panache 2.60.0

An LSP, formatter, and linter for Markdown, Quarto, and R Markdown
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
use crate::config::Extensions;
use crate::syntax::{AstNode, Heading, SyntaxKind, SyntaxNode};
use rowan::NodeOrToken;
use std::collections::HashMap;

/// Check if a syntax kind represents a block-level element for formatting purposes.
/// This determines when to add blank lines between elements.
pub fn is_block_element(kind: SyntaxKind) -> bool {
    matches!(
        kind,
        SyntaxKind::PARAGRAPH
            | SyntaxKind::FIGURE
            | SyntaxKind::HEADING
            | SyntaxKind::LIST
            | SyntaxKind::DEFINITION_LIST
            | SyntaxKind::BLOCK_QUOTE
            | SyntaxKind::CODE_BLOCK
            | SyntaxKind::SIMPLE_TABLE
            | SyntaxKind::MULTILINE_TABLE
            | SyntaxKind::PIPE_TABLE
            | SyntaxKind::LINE_BLOCK
    )
}

/// A code block with its location in the document.
#[derive(Debug, Clone)]
pub struct CodeBlock {
    /// Programming language of the block
    pub language: String,
    /// Content of the code block (without fences)
    pub content: String,
    /// Starting line number in the document (1-indexed)
    pub start_line: usize,
    /// Byte offset range of the content in the original document
    pub original_range: std::ops::Range<usize>,
}

/// Collect all fenced code blocks from a syntax tree, grouped by language.
pub fn collect_code_blocks(tree: &SyntaxNode, input: &str) -> HashMap<String, Vec<CodeBlock>> {
    let mut blocks: HashMap<String, Vec<CodeBlock>> = HashMap::new();

    for node in tree.descendants() {
        let block = match node.kind() {
            SyntaxKind::CODE_BLOCK => extract_code_block(&node, input),
            SyntaxKind::MYST_DIRECTIVE => extract_myst_directive_block(&node, input),
            _ => None,
        };
        if let Some(block) = block {
            blocks
                .entry(block.language.clone())
                .or_default()
                .push(block);
        }
    }

    blocks
}

/// Extract a verbatim MyST directive body as a lintable code block, keyed by the
/// directive argument (e.g. `python` in `` ```{code-block} python ``). Returns
/// `None` for non-verbatim directives (no `MYST_DIRECTIVE_BODY` child) or when
/// the language or body is empty. The body's byte range is preserved so external
/// diagnostics map back onto the original source.
fn extract_myst_directive_block(node: &SyntaxNode, input: &str) -> Option<CodeBlock> {
    let mut language = None;
    let mut body_node = None;

    for child in node.children() {
        match child.kind() {
            SyntaxKind::MYST_DIRECTIVE_OPEN => {
                for token in child.children_with_tokens() {
                    if let NodeOrToken::Token(t) = token
                        && t.kind() == SyntaxKind::MYST_DIRECTIVE_ARG
                    {
                        let raw = t.text();
                        language = Some(raw.strip_prefix('.').unwrap_or(raw).to_string());
                    }
                }
            }
            SyntaxKind::MYST_DIRECTIVE_BODY => body_node = Some(child),
            _ => {}
        }
    }

    let language = language?;
    let body_node = body_node?;
    let content = body_node.text().to_string();

    if language.is_empty() || content.is_empty() {
        return None;
    }

    let range = body_node.text_range();
    let start: usize = range.start().into();
    let end: usize = range.end().into();

    Some(CodeBlock {
        language,
        content,
        start_line: offset_to_line(input, start),
        original_range: start..end,
    })
}

fn extract_code_block(node: &SyntaxNode, input: &str) -> Option<CodeBlock> {
    let mut language = None;
    let mut content = String::new();
    let mut content_start_offset = None;
    let mut content_end_offset = None;

    for child in node.children_with_tokens() {
        if let NodeOrToken::Node(n) = child {
            match n.kind() {
                SyntaxKind::CODE_FENCE_OPEN => {
                    // Look for CodeInfo node, then extract CodeLanguage from inside it
                    for fence_child in n.children_with_tokens() {
                        if let NodeOrToken::Node(info_node) = fence_child
                            && info_node.kind() == SyntaxKind::CODE_INFO
                        {
                            // Search for CodeLanguage token inside CodeInfo node
                            for info_token in info_node.children_with_tokens() {
                                if let NodeOrToken::Token(t) = info_token
                                    && t.kind() == SyntaxKind::CODE_LANGUAGE
                                {
                                    let raw_language = t.text();
                                    let normalized = raw_language
                                        .strip_prefix('.')
                                        .unwrap_or(raw_language)
                                        .to_string();
                                    language = Some(normalized);
                                    break;
                                }
                            }
                        }
                    }
                }
                SyntaxKind::CODE_CONTENT => {
                    content = n.text().to_string();
                    // Track where the actual code content starts and ends (not the fence)
                    let range = n.text_range();
                    content_start_offset = Some(range.start().into());
                    content_end_offset = Some(range.end().into());
                }
                _ => {}
            }
        }
    }

    // Extract language - now from CodeLanguage token inside CodeInfo node
    let language = language?;

    // Skip if language is empty or content is empty
    if language.is_empty() || content.is_empty() {
        return None;
    }

    // Calculate start line from where content actually starts (after the fence line)
    let (start_line, original_range) =
        if let (Some(start), Some(end)) = (content_start_offset, content_end_offset) {
            (offset_to_line(input, start), start..end)
        } else {
            // Fallback to block range if we can't find content offset
            let start: usize = node.text_range().start().into();
            let end: usize = node.text_range().end().into();
            (offset_to_line(input, start), start..end)
        };

    Some(CodeBlock {
        language,
        content,
        start_line,
        original_range,
    })
}

/// Convert byte offset to 1-indexed line number.
pub fn offset_to_line(input: &str, offset: usize) -> usize {
    // Count how many newlines precede this offset
    let newline_count = input[..offset].chars().filter(|&c| c == '\n').count();
    // Line number is newlines + 1
    newline_count + 1
}

/// Normalize a label for case-insensitive matching.
/// Collapses whitespace and converts to lowercase.
///
/// Used for reference definitions and footnote IDs to ensure
/// case-insensitive and whitespace-normalized matching.
pub fn normalize_label(label: &str) -> String {
    label
        .split_whitespace()
        .collect::<Vec<_>>()
        .join(" ")
        .to_lowercase()
}

/// Normalize cross-reference and anchor labels for case-sensitive matching.
///
/// Unlike `normalize_label`, this preserves letter case.
pub fn normalize_anchor_label(label: &str) -> String {
    label.trim().to_string()
}

pub fn crossref_resolution_labels(label: &str, bookdown_references: bool) -> Vec<String> {
    let normalized = normalize_anchor_label(label);
    let mut labels = vec![normalized.clone()];

    if !bookdown_references {
        return labels;
    }

    if let Some((_, unprefixed)) = normalized.split_once(':')
        && !unprefixed.is_empty()
        && crate::parser::inlines::citations::has_bookdown_prefix(&normalized)
        && !labels.iter().any(|candidate| candidate == unprefixed)
    {
        labels.push(unprefixed.to_string());
    }

    labels
}

#[cfg(feature = "lsp")]
pub fn crossref_symbol_labels(label: &str, bookdown_references: bool) -> Vec<String> {
    let mut labels = crossref_resolution_labels(label, bookdown_references);

    if !bookdown_references {
        return labels;
    }

    let normalized = normalize_anchor_label(label);
    let is_prefixed = crate::parser::inlines::citations::has_bookdown_prefix(&normalized);
    if is_prefixed {
        return labels;
    }

    for prefix in crate::parser::inlines::citations::BOOKDOWN_LABEL_PREFIXES {
        let candidate = format!("{}:{}", prefix, normalized);
        if !labels.iter().any(|existing| existing == &candidate) {
            labels.push(candidate);
        }
    }

    labels
}

#[derive(Debug, Clone)]
pub struct ImplicitHeadingId {
    pub id: String,
    pub heading: SyntaxNode,
}

pub fn implicit_heading_ids(tree: &SyntaxNode, extensions: &Extensions) -> Vec<ImplicitHeadingId> {
    let mut out = Vec::new();
    let mut seen: HashMap<String, usize> = HashMap::new();

    for heading in tree.descendants().filter_map(Heading::cast) {
        let raw_text = heading
            .content()
            .map(|content| content.text())
            .unwrap_or_default();
        let normalized = normalize_label(&raw_text);
        if normalized.is_empty() {
            continue;
        }

        let base = heading_slugify(&normalized, extensions);
        if base.is_empty() {
            continue;
        }

        let count = seen.entry(base.clone()).or_insert(0);
        let id = if *count == 0 {
            base
        } else {
            format!("{}-{}", base, *count)
        };
        *count += 1;

        out.push(ImplicitHeadingId {
            id,
            heading: heading.syntax().clone(),
        });
    }

    out
}

/// Generate an auto identifier from heading text based on extension settings.
pub fn heading_slugify(text: &str, extensions: &Extensions) -> String {
    if extensions.gfm_auto_identifiers {
        gfm_slugify(text)
    } else {
        pandoc_slugify(text)
    }
}

/// Generate a GitHub-style auto identifier from heading text.
pub fn gfm_slugify(text: &str) -> String {
    let mut out = String::new();
    let mut prev_dash = false;

    for ch in text.chars() {
        if ch.is_whitespace() {
            if !out.is_empty() && !prev_dash {
                out.push('-');
                prev_dash = true;
            }
            continue;
        }

        for lc in ch.to_lowercase() {
            if lc.is_alphanumeric() || lc == '_' || lc == '-' {
                out.push(lc);
                prev_dash = lc == '-';
            }
        }
    }

    while out.ends_with('-') {
        out.pop();
    }

    out
}

/// Generate a Pandoc-style auto identifier from heading text.
pub fn pandoc_slugify(text: &str) -> String {
    let mut out = String::new();
    let mut prev_dash = false;

    for ch in text.chars() {
        if ch.is_whitespace() {
            if !out.is_empty() && !prev_dash {
                out.push('-');
                prev_dash = true;
            }
            continue;
        }

        for lc in ch.to_lowercase() {
            if lc.is_alphanumeric() || lc == '_' || lc == '-' || lc == '.' {
                out.push(lc);
                prev_dash = lc == '-';
            }
        }
    }

    while out.ends_with('-') {
        out.pop();
    }

    out
}

#[cfg(test)]
mod tests {
    #[cfg(feature = "lsp")]
    use super::crossref_symbol_labels;
    use super::{crossref_resolution_labels, implicit_heading_ids};

    #[test]
    fn crossref_resolution_labels_keep_exact_match() {
        let labels = crossref_resolution_labels("fig-plot", false);
        assert_eq!(labels, vec!["fig-plot".to_string()]);
    }

    #[test]
    fn crossref_resolution_labels_include_unprefixed_bookdown_key() {
        let labels = crossref_resolution_labels("fig:plot", true);
        assert_eq!(labels, vec!["fig:plot".to_string(), "plot".to_string()]);
    }

    #[cfg(feature = "lsp")]
    #[test]
    fn crossref_symbol_labels_include_bookdown_prefixed_variants() {
        let labels = crossref_symbol_labels("plot", true);
        assert!(labels.iter().any(|label| label == "plot"));
        assert!(labels.iter().any(|label| label == "fig:plot"));
        assert!(labels.iter().any(|label| label == "tab:plot"));
    }

    #[test]
    fn implicit_heading_ids_use_pandoc_duplicate_suffixes() {
        let tree = crate::parse("# Heading\n\n# Heading\n\n# Heading\n", None);
        let ids = implicit_heading_ids(&tree, &crate::config::Extensions::default())
            .into_iter()
            .map(|entry| entry.id)
            .collect::<Vec<_>>();
        assert_eq!(ids, vec!["heading", "heading-1", "heading-2"]);
    }

    #[test]
    fn implicit_heading_ids_use_gfm_slug_algorithm_when_enabled() {
        let tree = crate::parse("# 3. Applications\n", None);
        let ext = crate::config::Extensions {
            gfm_auto_identifiers: true,
            ..crate::config::Extensions::default()
        };
        let ids = implicit_heading_ids(&tree, &ext)
            .into_iter()
            .map(|entry| entry.id)
            .collect::<Vec<_>>();
        assert_eq!(ids, vec!["3-applications"]);
    }
}