Skip to main content

panache_parser/syntax/
kind.rs

1//! Syntax kinds and language definition for the Quarto/Pandoc CST.
2
3use rowan::Language;
4
5#[allow(non_camel_case_types)]
6#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
7#[repr(u16)]
8pub enum SyntaxKind {
9    // Tokens
10    WHITESPACE = 0,
11    NEWLINE,
12    TEXT,
13    BACKSLASH,         // \ (for escaping)
14    ESCAPED_CHAR,      // Any escaped character
15    NONBREAKING_SPACE, // \<space>
16    HARD_LINE_BREAK,   // \<newline>
17    DIV_MARKER,        // :::
18
19    // YAML tokens (metadata and in-tree YAML CST parser)
20    YAML_METADATA_DELIM, // --- or ... (for YAML blocks)
21    YAML_KEY,            // YAML mapping key token
22    YAML_COLON,          // YAML mapping key-value separator
23    YAML_TAG,            // YAML explicit tag token (e.g. !!str)
24    YAML_ANCHOR,         // YAML anchor token (e.g. &name)
25    YAML_ALIAS,          // YAML alias token (e.g. *name)
26    YAML_SCALAR_TEXT,    // YAML scalar content fragment (one physical line of a scalar)
27    YAML_FLOW_INDICATOR, // YAML flow structural punctuation ([ ] { } ,)
28    YAML_DIRECTIVE,      // YAML directive line (%YAML, %TAG)
29    YAML_COMMENT,        // YAML inline comment token
30    YAML_LINE_PREFIX,    // Embedded-YAML per-line prefix trivia (hashpipe `#|`)
31    YAML_DOCUMENT_START, // YAML document start marker (---)
32    YAML_DOCUMENT_END,   // YAML document end marker (...)
33
34    BLOCK_QUOTE_MARKER, // >
35    ALERT_MARKER,       // [!NOTE], [!TIP], etc.
36    IMAGE_LINK_START,   // ![
37    LIST_MARKER,        // - + *
38    TASK_CHECKBOX,      // [ ] or [x] or [X]
39    COMMENT_START,      // <!--
40    COMMENT_END,        // -->
41    ATTRIBUTE,          // {#label} for headings, math, etc.
42    // Structured children of a Pandoc `{...}` ATTRIBUTE. Each wraps the
43    // existing source bytes (markers/quotes included); the projector strips
44    // them. Absent on opaque ATTRIBUTE forms (MMD `[#id]`, raw-inline
45    // `{=format}`, fallback), which keep a single inner ATTRIBUTE token.
46    ATTR_ID,         // #id (token text includes the leading '#')
47    ATTR_CLASS,      // .class (token text includes the leading '.')
48    ATTR_KEY_VALUE,  // key=value (node grouping the pieces below)
49    ATTR_KEY,        // key (token, no '=')
50    ATTR_VALUE,      // value or "value"/'value' (token text includes quotes)
51    HORIZONTAL_RULE, // --- or *** or ___
52    BLANK_LINE,
53
54    // Links and images
55    LINK_START,           // [
56    LINK,                 // [text](url)
57    LINK_TEXT,            // text part of link
58    LINK_TEXT_END,        // ] closing link text
59    LINK_DEST_START,      // ( opening link destination
60    LINK_DEST,            // (url) or (url "title")
61    LINK_DEST_END,        // ) closing link destination
62    LINK_REF,             // [ref] in reference links
63    IMAGE_LINK,           // ![alt](url)
64    IMAGE_ALT,            // alt text in image
65    IMAGE_ALT_END,        // ] closing image alt
66    IMAGE_DEST_START,     // ( opening image destination
67    IMAGE_DEST_END,       // ) closing image destination
68    AUTO_LINK,            // <http://example.com>
69    AUTO_LINK_MARKER,     // < and >
70    REFERENCE_DEFINITION, // [label]: url "title"
71    FOOTNOTE_DEFINITION,  // [^id]: content
72    FOOTNOTE_REFERENCE,   // [^id]
73    FOOTNOTE_LABEL_START, // [^
74    FOOTNOTE_LABEL_ID,    // id in [^id] or [^id]:
75    FOOTNOTE_LABEL_END,   // ]
76    FOOTNOTE_LABEL_COLON, // :
77    REFERENCE_LABEL,      // [label] part
78    REFERENCE_URL,        // url part
79    REFERENCE_TITLE,      // "title" part
80
81    // Wikilinks (Pandoc `wikilinks_title_{after,before}_pipe` extensions)
82    WIKI_LINK,       // [[url]] or [[url|title]]
83    IMAGE_WIKI_LINK, // ![[url]] or ![[url|title]]
84    WIKI_LINK_OPEN,  // [[ or ![[
85    WIKI_LINK_URL,   // URL slot (raw TEXT child, no inline parsing)
86    WIKI_LINK_PIPE,  // | separator
87    WIKI_LINK_TITLE, // title slot (raw TEXT child, no inline parsing)
88    WIKI_LINK_CLOSE, // ]]
89
90    // Math
91    INLINE_MATH_MARKER,  // $
92    DISPLAY_MATH_MARKER, // $$
93    INLINE_MATH,
94    DISPLAY_MATH,
95    MATH_CONTENT, // wrapper node for parsed TeX math content (subtree root)
96
97    // Math content (structural TeX CST under MATH_CONTENT)
98    MATH_GROUP,       // { ... } brace group (node)
99    MATH_ENVIRONMENT, // \begin{env} ... \end{env} (node)
100    MATH_GROUP_OPEN,  // {
101    MATH_GROUP_CLOSE, // }
102    MATH_COMMAND,     // \foo control word or \% control symbol
103    MATH_LINE_BREAK,  // \\
104    MATH_ALIGN,       // & alignment tab
105    MATH_SCRIPT,      // ^ or _
106    MATH_COMMENT,     // % to end of line (TeX comment)
107    // `+ - * = < >` operator atom; one token per char. Class/precedence are
108    // contextual (unary minus, `\mathbin`) and deferred to the formatter.
109    MATH_OPERATOR,
110    MATH_TEXT,    // run of ordinary atoms
111    MATH_SPACE,   // run of spaces/tabs
112    MATH_NEWLINE, // newline within math content
113    // Bookdown equation label `(\#eq:label)`, recognized only when the
114    // `bookdown_equation_references` extension is enabled. A single token over
115    // the whole `(\#eq:...)` span so the indexer/LSP can target it precisely.
116    MATH_EQUATION_LABEL,
117
118    // Footnotes
119    INLINE_FOOTNOTE_START, // ^[
120    INLINE_FOOTNOTE_END,   // ]
121    INLINE_FOOTNOTE,       // ^[text]
122
123    // Citations
124    CITATION,                // [@key] or @key
125    CITATION_MARKER,         // @ or -@
126    CITATION_KEY,            // The citation key identifier
127    CITATION_BRACE_OPEN,     // { for complex keys
128    CITATION_BRACE_CLOSE,    // } for complex keys
129    CITATION_CONTENT,        // Text content in bracketed citations
130    CITATION_SEPARATOR,      // ; between multiple citations
131    CROSSREF,                // Quarto cross-reference: @fig-*, @eq-*, etc.
132    CROSSREF_MARKER,         // @ or -@ for cross-references
133    CROSSREF_KEY,            // Cross-reference key identifier
134    CROSSREF_BRACE_OPEN,     // { for braced cross-reference keys
135    CROSSREF_BRACE_CLOSE,    // } for braced cross-reference keys
136    CROSSREF_BOOKDOWN_OPEN,  // \@ref(
137    CROSSREF_BOOKDOWN_CLOSE, // )
138
139    // Spans
140    BRACKETED_SPAN,     // [text]{.class}
141    SPAN_CONTENT,       // text inside span
142    SPAN_ATTRIBUTES,    // {.class key="val"}
143    SPAN_BRACKET_OPEN,  // [
144    SPAN_BRACKET_CLOSE, // ]
145
146    // Shortcodes (Quarto)
147    SHORTCODE,              // {{< name args >}} or {{{< name args >}}}
148    SHORTCODE_MARKER_OPEN,  // {{< or {{{<
149    SHORTCODE_MARKER_CLOSE, // >}} or >}}}
150    SHORTCODE_CONTENT,      // content between markers
151
152    // Code
153    INLINE_CODE,
154    INLINE_CODE_MARKER,  // ` or `` or ```
155    INLINE_CODE_CONTENT, // Literal inline code content
156    INLINE_EXEC,         // Inline executable code span variants
157    INLINE_EXEC_MARKER,  // Backtick markers delimiting inline executable code
158    INLINE_EXEC_LANG,    // Runtime marker (`r` or `{r}`)
159    INLINE_EXEC_CONTENT, // Executable inline code expression
160    CODE_FENCE_MARKER,   // ``` or ~~~
161    CODE_BLOCK,
162
163    // Raw inline spans
164    RAW_INLINE,         // `content`{=format}
165    RAW_INLINE_MARKER,  // ` markers
166    RAW_INLINE_FORMAT,  // format name (html, latex, etc.)
167    RAW_INLINE_CONTENT, // raw content
168
169    // Inline emphasis and formatting
170    EMPHASIS,           // *text* or _text_
171    STRONG,             // **text** or __text__
172    STRIKEOUT,          // ~~text~~
173    MARK,               // ==text==
174    SUPERSCRIPT,        // ^text^
175    SUBSCRIPT,          // ~text~
176    EMPHASIS_MARKER,    // * or _ (for emphasis)
177    STRONG_MARKER,      // ** or __ (for strong)
178    STRIKEOUT_MARKER,   // ~~ (for strikeout)
179    MARK_MARKER,        // == (for mark/highlight)
180    SUPERSCRIPT_MARKER, // ^ (for superscript)
181    SUBSCRIPT_MARKER,   // ~ (for subscript)
182
183    // Composite nodes
184    DOCUMENT,
185
186    // YAML nodes
187    YAML_METADATA,
188    YAML_METADATA_CONTENT,    // Content lines inside YAML metadata block
189    YAML_STREAM, // YAML 1.2 stream wrapper (zero or more YAML_DOCUMENT children + trivia)
190    YAML_DOCUMENT, // a single YAML document (markers + body)
191    YAML_SCALAR, // YAML scalar value node (wraps YAML_SCALAR_TEXT content + NEWLINE/prefix leaves)
192    YAML_BLOCK_MAP, // YAML block mapping container
193    YAML_BLOCK_MAP_ENTRY, // YAML block mapping entry (key: value)
194    YAML_BLOCK_MAP_KEY, // YAML block mapping key wrapper
195    YAML_BLOCK_MAP_VALUE, // YAML block mapping value wrapper
196    YAML_FLOW_MAP, // YAML flow mapping container ({key: value, ...})
197    YAML_FLOW_MAP_ENTRY, // YAML flow mapping entry
198    YAML_FLOW_MAP_KEY, // YAML flow mapping key wrapper
199    YAML_FLOW_MAP_VALUE, // YAML flow mapping value wrapper
200    YAML_FLOW_SEQUENCE, // YAML flow sequence container ([a, b, ...])
201    YAML_FLOW_SEQUENCE_ITEM, // YAML flow sequence item wrapper
202    YAML_BLOCK_SEQUENCE, // YAML block sequence container (- item ...)
203    YAML_BLOCK_SEQUENCE_ITEM, // YAML block sequence item wrapper
204    YAML_BLOCK_SEQ_ENTRY, // YAML block sequence entry marker (-)
205
206    PANDOC_TITLE_BLOCK,
207    MMD_TITLE_BLOCK,
208    FENCED_DIV,
209    PARAGRAPH,
210    PLAIN, // Inline content without paragraph break (tight lists, definition lists, table cells)
211    BLOCK_QUOTE,
212    ALERT,
213    LIST,
214    LIST_ITEM,
215    DEFINITION_LIST,
216    DEFINITION_ITEM,
217    TERM,
218    DEFINITION,
219    DEFINITION_MARKER, // : or ~
220    LINE_BLOCK,
221    LINE_BLOCK_LINE,
222    LINE_BLOCK_MARKER, // |
223    COMMENT,
224    FIGURE, // Standalone image (Pandoc figure)
225
226    // HTML blocks
227    HTML_BLOCK,         // Generic HTML block
228    HTML_BLOCK_TAG,     // Opening/closing tags
229    HTML_BLOCK_CONTENT, // Content between tags
230    // Pandoc-dialect lift: a matched <div ...>...</div> block.
231    HTML_BLOCK_DIV,
232    // Structural region inside an HTML opening tag holding the
233    // attribute-list bytes — i.e. everything between the tag name and
234    // the closing `>`, exclusive. Recognized by `AttributeNode::cast`,
235    // so the salsa anchor index sees `id`/`class`/key=val attrs from
236    // `<div id="x">` blocks via the same walk that handles fenced-div
237    // and heading attributes.
238    HTML_ATTRS,
239
240    // Inline raw HTML (CommonMark §6.6 / Pandoc raw_html). One node per HTML
241    // tag/comment/declaration/PI/CDATA span; child token holds the verbatim
242    // bytes of the span.
243    INLINE_HTML,
244    INLINE_HTML_CONTENT,
245    // Pandoc-dialect inline lift: a matched <span ...>...</span> tag pair,
246    // mirroring HTML_BLOCK_DIV at the inline level. The open tag's
247    // attribute region is exposed structurally as HTML_ATTRS so the
248    // existing AttributeNode walk picks up `<span id>` ids automatically.
249    INLINE_HTML_SPAN,
250
251    // TeX blocks
252    TEX_BLOCK, // Raw tex block (e.g., LaTeX commands)
253
254    // Headings
255    HEADING,
256    HEADING_CONTENT,
257    ATX_HEADING_MARKER,       // leading #####
258    SETEXT_HEADING_UNDERLINE, // ===== or -----
259
260    // LaTeX inline commands
261    LATEX_COMMAND, // \command{...}
262
263    // Tables
264    SIMPLE_TABLE,
265    MULTILINE_TABLE,
266    PIPE_TABLE,
267    GRID_TABLE,
268    TABLE_HEADER,
269    TABLE_FOOTER,
270    TABLE_SEPARATOR,
271    TABLE_ROW,
272    TABLE_CELL,
273    TABLE_CAPTION,
274    TABLE_CAPTION_PREFIX, // "Table: ", "table: ", or ": "
275
276    // Code block parts
277    CODE_FENCE_OPEN,
278    CODE_FENCE_CLOSE,
279    CODE_INFO,     // Raw info string (preserved for lossless formatting)
280    CODE_LANGUAGE, // Parsed language identifier (r, python, etc.)
281
282    // Chunk options (for executable chunks like {r, echo=TRUE})
283    CHUNK_OPTIONS,          // Container for all chunk options
284    CHUNK_OPTION,           // Single option (key=value pair)
285    CHUNK_OPTION_KEY,       // Option name (e.g., echo, fig.cap)
286    CHUNK_OPTION_VALUE,     // Option value (e.g., TRUE, "text")
287    CHUNK_OPTION_QUOTE,     // Quote character (" or ') if present
288    CHUNK_LABEL,            // Special case: unlabeled first option in {r mylabel}
289    HASHPIPE_YAML_PREAMBLE, // Hashpipe YAML option preamble region inside CODE_CONTENT
290    HASHPIPE_YAML_CONTENT,  // Content lines belonging to hashpipe YAML preamble
291    HASHPIPE_PREFIX,        // Hashpipe option marker prefix (e.g., #|, //|, --|)
292
293    CODE_CONTENT,
294
295    // Div parts
296    DIV_FENCE_OPEN,
297    DIV_FENCE_CLOSE,
298    DIV_INFO,
299    DIV_CONTENT,
300    EMOJI, // :alias:
301
302    // Bracket-shape pattern that did not resolve as a link/image.
303    // Distinct from LINK/IMAGE_LINK so downstream tools (linter, LSP) can
304    // walk a typed wrapper without the parser having to lie about
305    // resolution. `is_image()` on the typed wrapper distinguishes
306    // `[foo]` from `![foo]` shapes.
307    UNRESOLVED_REFERENCE,
308}
309
310impl From<SyntaxKind> for rowan::SyntaxKind {
311    fn from(kind: SyntaxKind) -> Self {
312        Self(kind as u16)
313    }
314}
315
316#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
317pub enum PanacheLanguage {}
318
319impl Language for PanacheLanguage {
320    type Kind = SyntaxKind;
321
322    fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind {
323        unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
324    }
325
326    fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind {
327        kind.into()
328    }
329}