1use std::cell::RefCell;
19use std::collections::{HashMap, HashSet};
20
21use crate::SyntaxNode;
22use crate::syntax::SyntaxKind;
23use rowan::NodeOrToken;
24use serde_json::{Value, json};
25
26const PANDOC_API_VERSION: [u32; 4] = [1, 23, 1, 1];
32
33#[derive(Default)]
34struct RefsCtx {
35 refs: HashMap<String, (String, String)>,
36 heading_ids: HashSet<String>,
37 heading_id_by_offset: HashMap<u32, String>,
42 footnotes: HashMap<String, Vec<Block>>,
46 example_label_to_num: HashMap<String, usize>,
51 example_list_start_by_offset: HashMap<u32, usize>,
56 cite_note_num_by_offset: HashMap<u32, i64>,
60}
61
62thread_local! {
63 static REFS_CTX: RefCell<RefsCtx> = RefCell::new(RefsCtx::default());
64}
65
66pub fn to_pandoc_ast(tree: &SyntaxNode) -> String {
74 let ctx = build_refs_ctx(tree);
75 REFS_CTX.with(|c| *c.borrow_mut() = ctx);
76 let blocks = blocks_from_doc(tree);
77 let mut out = String::new();
78 out.push('[');
79 for (i, b) in blocks.iter().enumerate() {
80 if i > 0 {
81 out.push(',');
82 }
83 out.push(' ');
84 write_block(b, &mut out);
85 }
86 out.push_str(" ]");
87 REFS_CTX.with(|c| *c.borrow_mut() = RefsCtx::default());
88 out
89}
90
91pub fn to_pandoc_json(tree: &SyntaxNode) -> String {
111 let ctx = build_refs_ctx(tree);
112 REFS_CTX.with(|c| *c.borrow_mut() = ctx);
113 let blocks = blocks_from_doc(tree);
114 let blocks_json: Vec<Value> = blocks.iter().map(block_to_json).collect();
115 REFS_CTX.with(|c| *c.borrow_mut() = RefsCtx::default());
116 let doc = json!({
117 "pandoc-api-version": PANDOC_API_VERSION,
118 "meta": {},
119 "blocks": blocks_json,
120 });
121 serde_json::to_string(&doc).expect("pandoc-json serialization is infallible")
122}
123
124fn build_refs_ctx(tree: &SyntaxNode) -> RefsCtx {
125 build_refs_ctx_inherited(tree, None)
126}
127
128fn build_refs_ctx_inherited(tree: &SyntaxNode, parent: Option<&RefsCtx>) -> RefsCtx {
129 let mut ctx = RefsCtx::default();
130 collect_cite_note_nums(tree, &mut ctx);
131 let mut example_counter: usize = 0;
132 collect_example_numbering(tree, &mut ctx, &mut example_counter);
133 REFS_CTX.with(|c| {
134 let mut borrowed = c.borrow_mut();
135 borrowed.cite_note_num_by_offset = ctx.cite_note_num_by_offset.clone();
136 borrowed.example_label_to_num = ctx.example_label_to_num.clone();
137 borrowed.example_list_start_by_offset = ctx.example_list_start_by_offset.clone();
138 });
139 let mut seen_ids: HashMap<String, u32> = HashMap::new();
144 if let Some(p) = parent {
145 for id in &p.heading_ids {
146 if let Some(idx) = id.rfind('-')
147 && let Ok(n) = id[idx + 1..].parse::<u32>()
148 {
149 let base = id[..idx].to_string();
150 let entry = seen_ids.entry(base).or_insert(0);
151 *entry = (*entry).max(n + 1);
152 }
153 let entry = seen_ids.entry(id.clone()).or_insert(0);
154 *entry = (*entry).max(1);
155 }
156 }
157 collect_refs_and_headings(tree, &mut ctx, &mut seen_ids);
158 if let Some(p) = parent {
164 for (k, v) in &p.refs {
165 ctx.refs.entry(k.clone()).or_insert_with(|| v.clone());
166 }
167 for (k, v) in &p.footnotes {
168 ctx.footnotes.entry(k.clone()).or_insert_with(|| v.clone());
169 }
170 for id in &p.heading_ids {
171 ctx.heading_ids.insert(id.clone());
172 }
173 }
174 ctx
175}
176
177fn collect_cite_note_nums(tree: &SyntaxNode, ctx: &mut RefsCtx) {
183 let mut footnote_def_nodes: HashMap<String, SyntaxNode> = HashMap::new();
184 for child in tree.descendants() {
185 if child.kind() == SyntaxKind::FOOTNOTE_DEFINITION
186 && let Some(label) = footnote_label(&child)
187 {
188 footnote_def_nodes.entry(label).or_insert(child);
189 }
190 }
191 let mut counter: i64 = 0;
192 for child in tree.children() {
193 if child.kind() == SyntaxKind::FOOTNOTE_DEFINITION {
194 continue;
195 }
196 visit_for_cite_nums(&child, &footnote_def_nodes, &mut counter, None, ctx);
197 }
198}
199
200fn visit_for_cite_nums(
201 node: &SyntaxNode,
202 fn_defs: &HashMap<String, SyntaxNode>,
203 counter: &mut i64,
204 in_fn: Option<i64>,
205 ctx: &mut RefsCtx,
206) {
207 for el in node.children_with_tokens() {
208 if let NodeOrToken::Node(n) = el {
209 match n.kind() {
210 SyntaxKind::CITATION => {
211 let offset: u32 = n.text_range().start().into();
212 let num = if let Some(fn_num) = in_fn {
213 fn_num
214 } else {
215 *counter += 1;
216 *counter
217 };
218 ctx.cite_note_num_by_offset.insert(offset, num);
219 }
220 SyntaxKind::FOOTNOTE_REFERENCE => {
221 if in_fn.is_none() {
222 *counter += 1;
223 let fn_num = *counter;
224 if let Some(label) = footnote_label(&n)
225 && let Some(def) = fn_defs.get(&label)
226 {
227 visit_for_cite_nums(def, fn_defs, counter, Some(fn_num), ctx);
228 }
229 }
230 }
231 _ => visit_for_cite_nums(&n, fn_defs, counter, in_fn, ctx),
232 }
233 }
234 }
235}
236
237fn collect_example_numbering(node: &SyntaxNode, ctx: &mut RefsCtx, counter: &mut usize) {
243 for child in node.children() {
244 if child.kind() == SyntaxKind::LIST && list_is_example(&child) {
245 let list_offset: u32 = child.text_range().start().into();
246 ctx.example_list_start_by_offset
247 .insert(list_offset, *counter + 1);
248 for item in child
249 .children()
250 .filter(|c| c.kind() == SyntaxKind::LIST_ITEM)
251 {
252 *counter += 1;
253 if let Some(label) = example_item_label(&item) {
254 ctx.example_label_to_num.entry(label).or_insert(*counter);
255 }
256 }
257 collect_example_numbering(&child, ctx, counter);
260 } else {
261 collect_example_numbering(&child, ctx, counter);
262 }
263 }
264}
265
266fn list_is_example(list: &SyntaxNode) -> bool {
270 let Some(item) = list.children().find(|c| c.kind() == SyntaxKind::LIST_ITEM) else {
271 return false;
272 };
273 let marker = list_item_marker_text(&item);
274 let trimmed = marker.trim();
275 let body = if let Some(inner) = trimmed.strip_prefix('(').and_then(|s| s.strip_suffix(')')) {
276 inner
277 } else if let Some(inner) = trimmed.strip_suffix(')') {
278 inner
279 } else if let Some(inner) = trimmed.strip_suffix('.') {
280 inner
281 } else {
282 trimmed
283 };
284 body.starts_with('@')
285 && body[1..]
286 .chars()
287 .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
288}
289
290fn list_item_marker_text(item: &SyntaxNode) -> String {
291 item.children_with_tokens()
292 .filter_map(|el| el.into_token())
293 .find(|t| t.kind() == SyntaxKind::LIST_MARKER)
294 .map(|t| t.text().to_string())
295 .unwrap_or_default()
296}
297
298fn example_item_label(item: &SyntaxNode) -> Option<String> {
301 let marker = list_item_marker_text(item);
302 let trimmed = marker.trim();
303 let body = trimmed
304 .strip_prefix('(')
305 .and_then(|s| s.strip_suffix(')'))
306 .or_else(|| trimmed.strip_suffix(')'))
307 .or_else(|| trimmed.strip_suffix('.'))
308 .unwrap_or(trimmed);
309 let label = body.strip_prefix('@')?;
310 if label.is_empty() {
311 None
312 } else {
313 Some(label.to_string())
314 }
315}
316
317fn collect_refs_and_headings(
318 node: &SyntaxNode,
319 ctx: &mut RefsCtx,
320 seen_ids: &mut HashMap<String, u32>,
321) {
322 for child in node.children() {
323 match child.kind() {
324 SyntaxKind::REFERENCE_DEFINITION => {
325 if let Some((label, url, title)) = parse_reference_def(&child) {
326 ctx.refs
327 .entry(normalize_ref_label(&label))
328 .or_insert((url, title));
329 }
330 }
331 SyntaxKind::FOOTNOTE_DEFINITION => {
332 if let Some((label, blocks)) = parse_footnote_def(&child) {
333 ctx.footnotes.entry(label).or_insert(blocks);
334 }
335 }
336 SyntaxKind::HEADING => {
337 let (id, was_explicit) = heading_id_with_explicitness(&child);
338 let final_id = if was_explicit {
339 seen_ids.entry(id.clone()).or_insert(0);
342 id
343 } else {
344 let mut base = id;
345 if base.is_empty() {
346 base = "section".to_string();
347 }
348 let count = seen_ids.entry(base.clone()).or_insert(0);
349 let id = if *count == 0 {
350 base
351 } else {
352 format!("{base}-{count}")
353 };
354 *count += 1;
355 id
356 };
357 if !final_id.is_empty() {
358 let offset: u32 = child.text_range().start().into();
359 ctx.heading_ids.insert(final_id.clone());
360 ctx.heading_id_by_offset.insert(offset, final_id);
361 }
362 collect_refs_and_headings(&child, ctx, seen_ids);
363 }
364 _ => collect_refs_and_headings(&child, ctx, seen_ids),
365 }
366 }
367}
368
369fn heading_id_with_explicitness(node: &SyntaxNode) -> (String, bool) {
373 let inlines = node
374 .children()
375 .find(|c| c.kind() == SyntaxKind::HEADING_CONTENT)
376 .map(|c| coalesce_inlines(inlines_from(&c)))
377 .unwrap_or_default();
378 let parsed = extract_attr_from_node(node);
379 if !parsed.id.is_empty() {
380 return (parsed.id, true);
381 }
382 (pandoc_slugify(&inlines_to_plaintext(&inlines)), false)
383}
384
385fn parse_footnote_def(node: &SyntaxNode) -> Option<(String, Vec<Block>)> {
386 let label = footnote_label(node)?;
387 let mut blocks = Vec::new();
388 for child in node.children() {
389 if child.kind() == SyntaxKind::CODE_BLOCK
396 && !child
397 .children()
398 .any(|c| c.kind() == SyntaxKind::CODE_FENCE_OPEN)
399 {
400 blocks.push(indented_code_block_with_extra_strip(&child, 4));
401 } else {
402 collect_block(&child, &mut blocks);
403 }
404 }
405 Some((label, blocks))
406}
407
408fn indented_code_block_with_extra_strip(node: &SyntaxNode, extra: usize) -> Block {
409 let raw_format = code_block_raw_format(node);
410 let attr = code_block_attr(node);
411 let is_fenced = node
412 .children()
413 .any(|c| c.kind() == SyntaxKind::CODE_FENCE_OPEN);
414 let mut content = String::new();
415 for child in node.children() {
416 if child.kind() == SyntaxKind::CODE_CONTENT {
417 content.push_str(&child.text().to_string());
418 }
419 }
420 while content.ends_with('\n') {
421 content.pop();
422 }
423 content = content
428 .split('\n')
429 .map(expand_tabs_to_4)
430 .collect::<Vec<_>>()
431 .join("\n");
432 content = strip_leading_spaces_per_line(&content, extra);
433 if !is_fenced {
434 content = strip_indented_code_indent(&content);
435 }
436 if let Some(fmt) = raw_format {
437 return Block::RawBlock(fmt, content);
438 }
439 Block::CodeBlock(attr, content)
440}
441
442fn strip_leading_spaces_per_line(s: &str, n: usize) -> String {
443 let mut out = String::with_capacity(s.len());
444 for (i, line) in s.split('\n').enumerate() {
445 if i > 0 {
446 out.push('\n');
447 }
448 let to_strip = line.chars().take(n).take_while(|&c| c == ' ').count();
449 out.push_str(&line[to_strip..]);
450 }
451 out
452}
453
454fn footnote_label(node: &SyntaxNode) -> Option<String> {
455 for el in node.children_with_tokens() {
456 if let NodeOrToken::Token(t) = el
457 && t.kind() == SyntaxKind::FOOTNOTE_LABEL_ID
458 {
459 return Some(t.text().to_string());
460 }
461 }
462 None
463}
464
465fn parse_reference_def(node: &SyntaxNode) -> Option<(String, String, String)> {
466 let link = node.children().find(|c| c.kind() == SyntaxKind::LINK)?;
467 let label_node = link
468 .children()
469 .find(|c| c.kind() == SyntaxKind::LINK_TEXT)?;
470 let label = label_node.text().to_string();
471
472 let url_node = node
476 .children()
477 .find(|c| c.kind() == SyntaxKind::REFERENCE_URL)?;
478 let url_raw = url_node.text().to_string();
479 let url = url_raw
480 .strip_prefix('<')
481 .and_then(|r| r.strip_suffix('>'))
482 .unwrap_or(&url_raw)
483 .to_string();
484
485 let title = node
486 .children()
487 .find(|c| c.kind() == SyntaxKind::REFERENCE_TITLE)
488 .map(|t| parse_dest_title(&t.text().to_string()))
489 .unwrap_or_default();
490
491 Some((unescape_label(&label), url, title))
492}
493
494fn unescape_label(label: &str) -> String {
495 let mut out = String::with_capacity(label.len());
496 let mut chars = label.chars().peekable();
497 while let Some(ch) = chars.next() {
498 if ch == '\\'
499 && let Some(&next) = chars.peek()
500 && is_ascii_punct(next)
501 {
502 out.push(next);
503 chars.next();
504 } else {
505 out.push(ch);
506 }
507 }
508 out
509}
510
511fn is_ascii_punct(c: char) -> bool {
512 c.is_ascii() && (c.is_ascii_punctuation())
513}
514
515fn normalize_ref_label(label: &str) -> String {
518 let unescaped = unescape_label(label);
519 let mut out = String::new();
520 let mut last_space = false;
521 for ch in unescaped.chars() {
522 if ch.is_whitespace() {
523 if !out.is_empty() && !last_space {
524 out.push(' ');
525 last_space = true;
526 }
527 } else {
528 for lc in ch.to_lowercase() {
529 out.push(lc);
530 }
531 last_space = false;
532 }
533 }
534 if last_space {
535 out.pop();
536 }
537 out
538}
539
540fn lookup_ref(label: &str) -> Option<(String, String)> {
541 let key = normalize_ref_label(label);
542 REFS_CTX.with(|c| c.borrow().refs.get(&key).cloned())
543}
544
545fn lookup_heading_id(label: &str) -> Option<String> {
546 let id = pandoc_slugify(&unescape_label(label));
547 if id.is_empty() {
548 return None;
549 }
550 REFS_CTX.with(|c| {
551 if c.borrow().heading_ids.contains(&id) {
552 Some(id)
553 } else {
554 None
555 }
556 })
557}
558
559pub fn normalize_native(s: &str) -> String {
563 let mut tokens = Vec::new();
564 let bytes = s.as_bytes();
565 let mut i = 0usize;
566 while i < bytes.len() {
567 let c = bytes[i];
568 match c {
569 b' ' | b'\t' | b'\n' | b'\r' => {
570 i += 1;
571 }
572 b'[' | b']' | b'(' | b')' | b',' => {
573 tokens.push((c as char).to_string());
574 i += 1;
575 }
576 b'"' => {
577 let start = i;
579 i += 1;
580 while i < bytes.len() {
581 match bytes[i] {
582 b'\\' if i + 1 < bytes.len() => {
583 i += 2;
584 }
585 b'"' => {
586 i += 1;
587 break;
588 }
589 _ => {
590 i += 1;
591 }
592 }
593 }
594 tokens.push(s[start..i].to_string());
595 }
596 _ => {
597 let start = i;
598 while i < bytes.len() {
599 let b = bytes[i];
600 if matches!(
601 b,
602 b' ' | b'\t' | b'\n' | b'\r' | b'[' | b']' | b'(' | b')' | b',' | b'"'
603 ) {
604 break;
605 }
606 i += 1;
607 }
608 if i > start {
609 tokens.push(s[start..i].to_string());
610 }
611 }
612 }
613 }
614 tokens.join(" ")
615}
616
617#[derive(Debug, Clone)]
621#[allow(clippy::enum_variant_names)]
622enum Block {
623 Para(Vec<Inline>),
624 Plain(Vec<Inline>),
625 Header(usize, Attr, Vec<Inline>),
626 BlockQuote(Vec<Block>),
627 CodeBlock(Attr, String),
628 HorizontalRule,
629 BulletList(Vec<Vec<Block>>),
630 OrderedList(usize, &'static str, &'static str, Vec<Vec<Block>>),
631 RawBlock(String, String),
632 Table(TableData),
633 Div(Attr, Vec<Block>),
634 LineBlock(Vec<Vec<Inline>>),
635 DefinitionList(Vec<(Vec<Inline>, Vec<Vec<Block>>)>),
636 Figure(Attr, Vec<Block>, Vec<Block>),
641 Unsupported(String),
642}
643
644#[derive(Debug, Clone)]
645struct TableData {
646 attr: Attr,
650 caption: Vec<Inline>,
651 aligns: Vec<&'static str>,
652 widths: Vec<Option<f64>>,
654 head_rows: Vec<Vec<GridCell>>,
655 body_rows: Vec<Vec<GridCell>>,
656 foot_rows: Vec<Vec<GridCell>>,
659}
660
661#[derive(Debug, Clone)]
665struct GridCell {
666 row_span: u32,
667 col_span: u32,
668 blocks: Vec<Block>,
669}
670
671impl GridCell {
672 fn no_span(blocks: Vec<Block>) -> Self {
673 Self {
674 row_span: 1,
675 col_span: 1,
676 blocks,
677 }
678 }
679}
680
681#[derive(Debug, Clone)]
682#[allow(clippy::enum_variant_names)]
683enum Inline {
684 Str(String),
685 Space,
686 SoftBreak,
687 LineBreak,
688 Emph(Vec<Inline>),
689 Strong(Vec<Inline>),
690 Strikeout(Vec<Inline>),
691 Superscript(Vec<Inline>),
692 Subscript(Vec<Inline>),
693 Code(Attr, String),
694 Link(Attr, Vec<Inline>, String, String),
695 Image(Attr, Vec<Inline>, String, String),
696 Math(&'static str, String),
697 Span(Attr, Vec<Inline>),
698 RawInline(String, String),
699 Quoted(&'static str, Vec<Inline>),
700 Note(Vec<Block>),
701 Cite(Vec<Citation>, Vec<Inline>),
702 Unsupported(String),
703}
704
705#[derive(Debug, Clone)]
706struct Citation {
707 id: String,
708 prefix: Vec<Inline>,
709 suffix: Vec<Inline>,
710 mode: CitationMode,
711 note_num: i64,
712 hash: i64,
713}
714
715#[derive(Debug, Clone, Copy)]
716enum CitationMode {
717 AuthorInText,
718 NormalCitation,
719 SuppressAuthor,
720}
721
722#[derive(Debug, Default, Clone)]
723struct Attr {
724 id: String,
725 classes: Vec<String>,
726 kvs: Vec<(String, String)>,
727}
728
729fn blocks_from_doc(doc: &SyntaxNode) -> Vec<Block> {
732 let mut out = Vec::new();
733 for child in doc.children() {
734 collect_block(&child, &mut out);
735 }
736 out
737}
738
739fn block_from(node: &SyntaxNode) -> Option<Block> {
740 match node.kind() {
741 SyntaxKind::PARAGRAPH => Some(Block::Para(coalesce_inlines(inlines_from(node)))),
742 SyntaxKind::PLAIN => Some(Block::Plain(coalesce_inlines(inlines_from(node)))),
743 SyntaxKind::HEADING => Some(heading_block(node)),
744 SyntaxKind::BLOCK_QUOTE => Some(Block::BlockQuote(blockquote_blocks(node))),
745 SyntaxKind::CODE_BLOCK => Some(code_block(node)),
746 SyntaxKind::HORIZONTAL_RULE => Some(Block::HorizontalRule),
747 SyntaxKind::LIST => Some(list_block(node)),
748 SyntaxKind::BLANK_LINE => None,
749 SyntaxKind::REFERENCE_DEFINITION => None,
752 SyntaxKind::FOOTNOTE_DEFINITION => None,
755 SyntaxKind::YAML_METADATA => None,
758 SyntaxKind::PANDOC_TITLE_BLOCK => None,
761 SyntaxKind::HTML_BLOCK => Some(html_block(node)),
762 SyntaxKind::HTML_BLOCK_DIV => Some(html_div_block(node)),
763 SyntaxKind::PIPE_TABLE => pipe_table(node).map(Block::Table),
764 SyntaxKind::SIMPLE_TABLE => simple_table(node).map(Block::Table),
765 SyntaxKind::GRID_TABLE => grid_table(node).map(Block::Table),
766 SyntaxKind::MULTILINE_TABLE => multiline_table(node).map(Block::Table),
767 SyntaxKind::TEX_BLOCK => Some(tex_block(node)),
768 SyntaxKind::FENCED_DIV => Some(fenced_div(node)),
769 SyntaxKind::LINE_BLOCK => Some(line_block(node)),
770 SyntaxKind::DEFINITION_LIST => Some(definition_list(node)),
771 SyntaxKind::FIGURE => Some(figure_block(node)),
772 other => Some(Block::Unsupported(format!("{other:?}"))),
773 }
774}
775
776fn figure_block(node: &SyntaxNode) -> Block {
783 let mut alt: Vec<Inline> = Vec::new();
784 let mut image_inline: Option<Inline> = None;
785 if let Some(image) = node.children().find(|c| c.kind() == SyntaxKind::IMAGE_LINK) {
786 let alt_node = image.children().find(|c| c.kind() == SyntaxKind::IMAGE_ALT);
787 if let Some(an) = alt_node {
788 alt = coalesce_inlines(inlines_from(&an));
789 }
790 let mut tmp = Vec::new();
791 render_image_inline(&image, &mut tmp);
792 if let Some(first) = tmp.into_iter().next() {
793 image_inline = Some(first);
794 }
795 }
796 let (figure_attr, image_inline) = match image_inline {
799 Some(Inline::Image(mut attr, alt_inlines, url, title)) if !attr.id.is_empty() => {
800 let fig_attr = Attr::with_id(std::mem::take(&mut attr.id));
801 (fig_attr, Some(Inline::Image(attr, alt_inlines, url, title)))
802 }
803 other => (Attr::default(), other),
804 };
805 let caption = if alt.is_empty() {
806 Vec::new()
807 } else {
808 vec![Block::Plain(alt)]
809 };
810 let body = match image_inline {
811 Some(img) => vec![Block::Plain(vec![img])],
812 None => Vec::new(),
813 };
814 Block::Figure(figure_attr, caption, body)
815}
816
817fn heading_block(node: &SyntaxNode) -> Block {
818 let level = heading_level(node);
819 let inlines = node
820 .children()
821 .find(|c| c.kind() == SyntaxKind::HEADING_CONTENT)
822 .map(|c| coalesce_inlines(inlines_from(&c)))
823 .unwrap_or_default();
824 let offset: u32 = node.text_range().start().into();
828 let final_id = REFS_CTX
829 .with(|c| c.borrow().heading_id_by_offset.get(&offset).cloned())
830 .unwrap_or_default();
831 let mut attr = extract_attr_from_node(node);
832 if attr.id.is_empty() {
833 attr.id = final_id;
834 }
835 Block::Header(level, attr, inlines)
836}
837
838fn heading_level(node: &SyntaxNode) -> usize {
839 for child in node.children() {
840 if child.kind() == SyntaxKind::ATX_HEADING_MARKER {
841 for tok in child.children_with_tokens() {
842 if let Some(t) = tok.as_token()
843 && t.kind() == SyntaxKind::ATX_HEADING_MARKER
844 {
845 return t.text().chars().filter(|&c| c == '#').count();
846 }
847 }
848 }
849 }
850 for el in node.descendants_with_tokens() {
851 if let NodeOrToken::Token(t) = el
852 && t.kind() == SyntaxKind::SETEXT_HEADING_UNDERLINE
853 {
854 return if t.text().trim_start().starts_with('=') {
855 1
856 } else {
857 2
858 };
859 }
860 }
861 1
862}
863
864fn blockquote_blocks(node: &SyntaxNode) -> Vec<Block> {
865 let mut out = Vec::new();
866 for child in node.children() {
867 collect_block(&child, &mut out);
868 }
869 out
870}
871
872fn code_block(node: &SyntaxNode) -> Block {
873 let raw_format = code_block_raw_format(node);
874 let attr = code_block_attr(node);
875 let is_fenced = node
876 .children()
877 .any(|c| c.kind() == SyntaxKind::CODE_FENCE_OPEN);
878 let mut content = String::new();
879 for child in node.children() {
880 if child.kind() == SyntaxKind::CODE_CONTENT {
881 content.push_str(&child.text().to_string());
882 }
883 }
884 while content.ends_with('\n') {
886 content.pop();
887 }
888 if is_fenced {
889 content = content
894 .split('\n')
895 .map(expand_tabs_to_4)
896 .collect::<Vec<_>>()
897 .join("\n");
898 } else {
899 content = strip_indented_code_indent(&content);
900 }
901 if let Some(fmt) = raw_format {
902 return Block::RawBlock(fmt, content);
903 }
904 Block::CodeBlock(attr, content)
905}
906
907fn code_block_raw_format(node: &SyntaxNode) -> Option<String> {
912 let open = node
913 .children()
914 .find(|c| c.kind() == SyntaxKind::CODE_FENCE_OPEN)?;
915 let info = open
916 .children()
917 .find(|c| c.kind() == SyntaxKind::CODE_INFO)?;
918 let raw = info.text().to_string();
919 let trimmed = raw.trim();
920 let inner = trimmed
921 .strip_prefix('{')
922 .and_then(|s| s.strip_suffix('}'))?;
923 let inner = inner.trim();
924 let format = inner.strip_prefix('=')?.trim();
925 if format.is_empty() || format.contains(char::is_whitespace) {
926 return None;
927 }
928 Some(format.to_string())
929}
930
931fn code_block_attr(node: &SyntaxNode) -> Attr {
932 let Some(open) = node
933 .children()
934 .find(|c| c.kind() == SyntaxKind::CODE_FENCE_OPEN)
935 else {
936 return Attr::default();
937 };
938 let Some(info) = open.children().find(|c| c.kind() == SyntaxKind::CODE_INFO) else {
939 return Attr::default();
940 };
941 let has_bare_attrs = info.children_with_tokens().any(|el| {
947 matches!(
948 el.kind(),
949 SyntaxKind::ATTR_ID | SyntaxKind::ATTR_CLASS | SyntaxKind::ATTR_KEY_VALUE
950 )
951 });
952 let has_chunk_options = info
953 .children()
954 .any(|c| c.kind() == SyntaxKind::CHUNK_OPTIONS);
955 if has_bare_attrs && !has_chunk_options {
956 let mut attr = read_bare_attr_children(&info);
957 if let Some(lang) = info
959 .children_with_tokens()
960 .find(|el| el.kind() == SyntaxKind::CODE_LANGUAGE)
961 .and_then(|el| el.as_token().map(|t| t.text().to_string()))
962 {
963 attr.classes.insert(0, normalize_lang_id(&lang));
964 }
965 return attr;
966 }
967
968 let raw = info.text().to_string();
969 let trimmed = raw.trim();
970 if let Some(inner) = trimmed.strip_prefix('{').and_then(|s| s.strip_suffix('}')) {
971 return parse_attr_block(inner);
972 }
973 if let Some(brace) = trimmed.find('{')
976 && trimmed.ends_with('}')
977 {
978 let lang = trimmed[..brace].trim();
979 let attr_inner = &trimmed[brace + 1..trimmed.len() - 1];
980 let mut attr = parse_attr_block(attr_inner);
981 if !lang.is_empty() {
982 attr.classes.insert(0, normalize_lang_id(lang));
983 }
984 return attr;
985 }
986 if !trimmed.is_empty() {
987 return Attr {
988 id: String::new(),
989 classes: vec![normalize_lang_id(trimmed)],
990 kvs: Vec::new(),
991 };
992 }
993 Attr::default()
994}
995
996fn normalize_lang_id(lang: &str) -> String {
1000 let lower = lang.to_ascii_lowercase();
1001 match lower.as_str() {
1002 "c++" => "cpp".to_string(),
1003 "objective-c" => "objectivec".to_string(),
1004 _ => lower,
1005 }
1006}
1007
1008fn strip_indented_code_indent(s: &str) -> String {
1012 let mut out = String::with_capacity(s.len());
1013 for (i, line) in s.split('\n').enumerate() {
1014 if i > 0 {
1015 out.push('\n');
1016 }
1017 let expanded = expand_tabs_to_4(line);
1021 let stripped = if let Some(rest) = expanded.strip_prefix(" ") {
1022 rest.to_string()
1023 } else if let Some(rest) = expanded.strip_prefix('\t') {
1024 rest.to_string()
1025 } else {
1026 expanded
1030 };
1031 out.push_str(&stripped);
1032 }
1033 out
1034}
1035
1036fn expand_tabs_to_4(line: &str) -> String {
1040 let mut out = String::with_capacity(line.len());
1041 let mut col = 0usize;
1042 for c in line.chars() {
1043 if c == '\t' {
1044 let next = (col / 4 + 1) * 4;
1045 for _ in col..next {
1046 out.push(' ');
1047 }
1048 col = next;
1049 } else {
1050 out.push(c);
1051 col += 1;
1052 }
1053 }
1054 out
1055}
1056
1057fn html_block(node: &SyntaxNode) -> Block {
1063 let mut content = node.text().to_string();
1064 while content.ends_with('\n') {
1065 content.pop();
1066 }
1067 Block::RawBlock("html".to_string(), content)
1068}
1069
1070fn html_div_block(node: &SyntaxNode) -> Block {
1087 let attr = cst_div_open_tag_attr(node);
1088 if div_has_structural_inner(node) {
1089 let mut blocks = Vec::new();
1090 for child in node.children() {
1091 match child.kind() {
1092 SyntaxKind::HTML_BLOCK_TAG | SyntaxKind::BLANK_LINE => {}
1093 _ => collect_block(&child, &mut blocks),
1094 }
1095 }
1096 return Block::Div(attr, blocks);
1097 }
1098 debug_assert!(
1099 false,
1100 "HTML_BLOCK_DIV without structural inner shape — parser regression"
1101 );
1102 Block::Div(attr, Vec::new())
1103}
1104
1105fn collect_html_block_text_skip_bq_markers(node: &SyntaxNode) -> String {
1122 let mut out = String::new();
1123 let mut skip_next_ws = false;
1124 let mut at_line_start = true;
1125 walk_skip_bq_markers(node, &mut out, &mut skip_next_ws, &mut at_line_start);
1126 out
1127}
1128
1129fn walk_skip_bq_markers(
1130 node: &SyntaxNode,
1131 out: &mut String,
1132 skip_next_ws: &mut bool,
1133 at_line_start: &mut bool,
1134) {
1135 for child in node.children_with_tokens() {
1136 match child {
1137 NodeOrToken::Node(n) => walk_skip_bq_markers(&n, out, skip_next_ws, at_line_start),
1138 NodeOrToken::Token(t) => {
1139 if t.kind() == SyntaxKind::BLOCK_QUOTE_MARKER {
1140 *skip_next_ws = true;
1141 *at_line_start = false;
1142 continue;
1143 }
1144 if *skip_next_ws && t.kind() == SyntaxKind::WHITESPACE {
1145 *skip_next_ws = false;
1146 *at_line_start = false;
1147 continue;
1148 }
1149 if *at_line_start && t.kind() == SyntaxKind::WHITESPACE {
1150 *at_line_start = false;
1151 continue;
1152 }
1153 *skip_next_ws = false;
1154 let kind = t.kind();
1155 out.push_str(t.text());
1156 *at_line_start = kind == SyntaxKind::NEWLINE || kind == SyntaxKind::BLANK_LINE;
1157 }
1158 }
1159 }
1160}
1161
1162fn div_has_structural_inner(node: &SyntaxNode) -> bool {
1178 let mut tags = node
1179 .children()
1180 .filter(|c| c.kind() == SyntaxKind::HTML_BLOCK_TAG);
1181 let Some(open_tag) = tags.next() else {
1182 return false;
1183 };
1184 let close_tag = tags.next();
1189 if tags.next().is_some() {
1190 return false;
1191 }
1192 if !html_block_open_tag_is_clean(&open_tag) {
1193 return false;
1194 }
1195 if let Some(close_tag) = close_tag.as_ref()
1196 && !html_block_close_tag_is_clean(close_tag)
1197 {
1198 return false;
1199 }
1200 !node
1201 .children()
1202 .any(|c| c.kind() == SyntaxKind::HTML_BLOCK_CONTENT)
1203}
1204
1205fn html_block_open_tag_is_clean(open_tag: &SyntaxNode) -> bool {
1213 let mut seen_gt = false;
1214 for child in open_tag.children_with_tokens() {
1215 let NodeOrToken::Token(t) = child else {
1216 continue;
1219 };
1220 if !seen_gt {
1221 if t.kind() == SyntaxKind::TEXT && t.text().ends_with('>') {
1222 seen_gt = true;
1223 }
1224 } else if t.kind() != SyntaxKind::NEWLINE {
1225 return false;
1226 }
1227 }
1228 seen_gt
1229}
1230
1231fn html_block_close_tag_is_clean(close_tag: &SyntaxNode) -> bool {
1235 for child in close_tag.children_with_tokens() {
1236 if let NodeOrToken::Token(t) = child
1237 && t.kind() == SyntaxKind::TEXT
1238 {
1239 return t.text().starts_with("</");
1240 }
1241 }
1242 false
1243}
1244
1245fn cst_div_open_tag_attr(node: &SyntaxNode) -> Attr {
1253 let Some(open_tag) = node
1254 .children()
1255 .find(|c| c.kind() == SyntaxKind::HTML_BLOCK_TAG)
1256 else {
1257 return Attr::default();
1258 };
1259 let mut attr = Attr::default();
1263 for region in open_tag
1264 .children()
1265 .filter(|c| c.kind() == SyntaxKind::HTML_ATTRS)
1266 {
1267 let part = attr_from_html_attrs_node(®ion);
1268 if attr.id.is_empty() && !part.id.is_empty() {
1269 attr.id = part.id;
1270 }
1271 attr.classes.extend(part.classes);
1272 attr.kvs.extend(part.kvs);
1273 }
1274 attr
1275}
1276
1277fn emit_html_block(node: &SyntaxNode, out: &mut Vec<Block>) {
1293 if html_block_has_structural_lift(node) {
1301 emit_html_block_structural(node, out);
1302 return;
1303 }
1304 let mut content = collect_html_block_text_skip_bq_markers(node);
1310 while content
1315 .as_bytes()
1316 .last()
1317 .is_some_and(|b| matches!(b, b'\n' | b'\r' | b' ' | b'\t'))
1318 {
1319 content.pop();
1320 }
1321 let leading_ws = content
1322 .as_bytes()
1323 .iter()
1324 .position(|&b| b != b' ' && b != b'\t')
1325 .unwrap_or(content.len());
1326 let trimmed = &content[leading_ws..];
1327 let strip_first_line_indent = leading_ws > 0
1334 && leading_ws <= 3
1335 && content.as_bytes()[..leading_ws].iter().all(|&b| b == b' ');
1336 if trimmed.starts_with("<!--")
1337 || trimmed.starts_with("<?")
1338 || trimmed.starts_with("<![CDATA[")
1339 || trimmed.starts_with("<!")
1340 || is_raw_text_element_open(trimmed)
1341 {
1342 let raw = if strip_first_line_indent {
1343 content[leading_ws..].to_string()
1344 } else {
1345 content
1346 };
1347 out.push(Block::RawBlock("html".to_string(), raw));
1348 return;
1349 }
1350 let walker_input = if strip_first_line_indent {
1351 &content[leading_ws..]
1352 } else {
1353 content.as_str()
1354 };
1355 split_html_block_by_tags(walker_input, out);
1356}
1357
1358fn html_block_has_structural_lift(node: &SyntaxNode) -> bool {
1366 let mut tags = node
1367 .children()
1368 .filter(|c| c.kind() == SyntaxKind::HTML_BLOCK_TAG);
1369 let Some(open_tag) = tags.next() else {
1370 return false;
1371 };
1372 let Some(close_tag) = tags.next() else {
1373 return false;
1374 };
1375 if tags.next().is_some() {
1376 return false;
1377 }
1378 if !html_block_open_tag_is_clean(&open_tag) {
1379 return false;
1380 }
1381 if !html_block_close_tag_is_clean(&close_tag) {
1382 return false;
1383 }
1384 !node
1385 .children()
1386 .any(|c| c.kind() == SyntaxKind::HTML_BLOCK_CONTENT)
1387}
1388
1389fn emit_html_block_structural(node: &SyntaxNode, out: &mut Vec<Block>) {
1396 for child in node.children() {
1397 match child.kind() {
1398 SyntaxKind::HTML_BLOCK_TAG => {
1399 let text = open_tag_raw_block_text(&child);
1400 out.push(Block::RawBlock("html".to_string(), text));
1401 }
1402 SyntaxKind::BLANK_LINE => {}
1403 _ => collect_block(&child, out),
1404 }
1405 }
1406}
1407
1408fn open_tag_raw_block_text(tag: &SyntaxNode) -> String {
1418 let has_attrs = tag.children().any(|c| c.kind() == SyntaxKind::HTML_ATTRS);
1419 if has_attrs {
1420 let mut name_prefix: Option<String> = None;
1421 let mut attrs: Vec<String> = Vec::new();
1422 for child in tag.children_with_tokens() {
1423 match child {
1424 NodeOrToken::Token(t) if t.kind() == SyntaxKind::TEXT => {
1425 let text = t.text();
1426 if name_prefix.is_none() && text.starts_with('<') {
1427 if let Some(gt_idx) = text.find('>') {
1428 return text[..=gt_idx].to_string();
1432 }
1433 name_prefix = Some(text.to_string());
1434 }
1435 }
1436 NodeOrToken::Node(n) if n.kind() == SyntaxKind::HTML_ATTRS => {
1437 let attr_text = n.text().to_string();
1438 let trimmed = attr_text.trim();
1439 if !trimmed.is_empty() {
1440 attrs.push(trimmed.to_string());
1441 }
1442 }
1443 _ => {}
1444 }
1445 }
1446 let mut result = name_prefix.unwrap_or_default();
1447 for attr in &attrs {
1448 result.push(' ');
1449 result.push_str(attr);
1450 }
1451 result.push('>');
1452 return result;
1453 }
1454 let mut text = String::new();
1463 let mut skip_next_ws = false;
1464 for child in tag.children_with_tokens() {
1465 if let NodeOrToken::Token(t) = child {
1466 if t.kind() == SyntaxKind::BLOCK_QUOTE_MARKER {
1467 skip_next_ws = true;
1468 continue;
1469 }
1470 if skip_next_ws && t.kind() == SyntaxKind::WHITESPACE {
1471 skip_next_ws = false;
1472 continue;
1473 }
1474 if text.is_empty() && t.kind() == SyntaxKind::WHITESPACE {
1475 continue;
1476 }
1477 skip_next_ws = false;
1478 text.push_str(t.text());
1479 }
1480 }
1481 while text.ends_with('\n') {
1482 text.pop();
1483 }
1484 text
1485}
1486
1487fn split_html_block_by_tags(content: &str, out: &mut Vec<Block>) {
1502 use crate::parser::blocks::html_blocks::{
1503 is_pandoc_block_tag_name, is_pandoc_inline_block_tag_name, is_pandoc_void_block_tag_name,
1504 };
1505 use crate::parser::inlines::inline_html::{parse_close_tag, parse_open_tag};
1506
1507 let bytes = content.as_bytes();
1508 let mut i = 0usize;
1509 let mut text_start = 0usize;
1510 let mut inline_pending = false;
1511 let mut consecutive_newlines = 0usize;
1512 while i < bytes.len() {
1513 let b = bytes[i];
1514 if b == b'\n' {
1515 consecutive_newlines += 1;
1516 if consecutive_newlines >= 2 {
1520 inline_pending = false;
1521 }
1522 i += 1;
1523 continue;
1524 }
1525 consecutive_newlines = 0;
1526 if b != b'<' {
1527 if !b.is_ascii_whitespace() {
1528 inline_pending = true;
1529 }
1530 i += 1;
1531 continue;
1532 }
1533 let rest = &content[i..];
1534 let open_end = parse_open_tag(rest);
1535 let close_end = parse_close_tag(rest);
1536 let Some((tag_end, is_close)) = open_end
1537 .map(|n| (n, false))
1538 .or_else(|| close_end.map(|n| (n, true)))
1539 else {
1540 inline_pending = true;
1541 i += 1;
1542 continue;
1543 };
1544 let tag_text = &rest[..tag_end];
1545 let Some(name) = extract_html_tag_name(tag_text) else {
1546 inline_pending = true;
1547 i += 1;
1548 continue;
1549 };
1550 if is_pandoc_block_tag_name(name) {
1551 if i > text_start {
1558 flush_html_block_text(&content[text_start..i], out);
1559 }
1560 out.push(Block::RawBlock("html".to_string(), tag_text.to_string()));
1561 i += tag_end;
1562 text_start = i;
1563 inline_pending = false;
1564 continue;
1565 }
1566 if is_pandoc_inline_block_tag_name(name) {
1567 if !inline_pending {
1586 if !is_close
1587 && let Some((close_start, close_end)) =
1588 find_matching_html_close_with_start(content, i, name)
1589 && !interior_starts_with_void_block_tag(content, i + tag_end)
1590 {
1591 if i > text_start {
1592 flush_html_block_text(&content[text_start..i], out);
1593 }
1594 out.push(Block::RawBlock("html".to_string(), tag_text.to_string()));
1595 let interior = &content[i + tag_end..close_start];
1596 flush_html_block_text(interior, out);
1597 let close_text = &content[close_start..close_end];
1598 out.push(Block::RawBlock("html".to_string(), close_text.to_string()));
1599 i = close_end;
1600 text_start = i;
1601 inline_pending = false;
1602 continue;
1603 }
1604 if i > text_start {
1605 flush_html_block_text(&content[text_start..i], out);
1606 }
1607 out.push(Block::RawBlock("html".to_string(), tag_text.to_string()));
1608 i += tag_end;
1609 text_start = i;
1610 inline_pending = false;
1611 continue;
1612 }
1613 inline_pending = true;
1614 i += tag_end;
1615 continue;
1616 }
1617 if is_pandoc_void_block_tag_name(name) {
1618 if !inline_pending {
1627 if i > text_start {
1628 flush_html_block_text(&content[text_start..i], out);
1629 }
1630 out.push(Block::RawBlock("html".to_string(), tag_text.to_string()));
1631 i += tag_end;
1632 text_start = i;
1633 inline_pending = false;
1634 continue;
1635 }
1636 inline_pending = true;
1637 i += tag_end;
1638 continue;
1639 }
1640 inline_pending = true;
1644 i += tag_end;
1645 }
1646 if text_start < bytes.len() {
1647 flush_html_block_tail_text(&content[text_start..], out);
1652 }
1653}
1654
1655fn flush_html_block_text(text: &str, out: &mut Vec<Block>) {
1663 if text.trim().is_empty() {
1664 return;
1665 }
1666 let trailing_blank = trailing_newlines(text) >= 2;
1667 let mut blocks = parse_pandoc_blocks(text);
1668 if blocks.is_empty() {
1669 return;
1670 }
1671 if !trailing_blank
1672 && let Some(Block::Para(_)) = blocks.last()
1673 && let Some(Block::Para(inlines)) = blocks.pop()
1674 {
1675 blocks.push(Block::Plain(inlines));
1676 }
1677 out.extend(blocks);
1678}
1679
1680fn flush_html_block_tail_text(text: &str, out: &mut Vec<Block>) {
1684 if text.trim().is_empty() {
1685 return;
1686 }
1687 let blocks = parse_pandoc_blocks(text);
1688 out.extend(blocks);
1689}
1690
1691fn trailing_newlines(s: &str) -> usize {
1692 s.bytes().rev().take_while(|&b| b == b'\n').count()
1693}
1694
1695fn interior_starts_with_void_block_tag(content: &str, interior_start: usize) -> bool {
1704 use crate::parser::blocks::html_blocks::is_pandoc_void_block_tag_name;
1705 use crate::parser::inlines::inline_html::parse_open_tag;
1706
1707 let bytes = content.as_bytes();
1708 let mut i = interior_start;
1709 while i < bytes.len() && matches!(bytes[i], b'\n' | b' ' | b'\t') {
1710 i += 1;
1711 }
1712 if i >= bytes.len() || bytes[i] != b'<' {
1713 return false;
1714 }
1715 let rest = &content[i..];
1716 let Some(end) = parse_open_tag(rest) else {
1717 return false;
1718 };
1719 extract_html_tag_name(&rest[..end]).is_some_and(is_pandoc_void_block_tag_name)
1720}
1721
1722fn extract_html_tag_name(tag_text: &str) -> Option<&str> {
1725 let bytes = tag_text.as_bytes();
1726 if bytes.first() != Some(&b'<') {
1727 return None;
1728 }
1729 let start = if bytes.get(1) == Some(&b'/') { 2 } else { 1 };
1730 let mut end = start;
1731 while end < bytes.len() && (bytes[end].is_ascii_alphanumeric() || bytes[end] == b'-') {
1732 end += 1;
1733 }
1734 if start == end {
1735 None
1736 } else {
1737 Some(&tag_text[start..end])
1738 }
1739}
1740
1741fn find_matching_html_close_with_start(
1746 content: &str,
1747 start: usize,
1748 name: &str,
1749) -> Option<(usize, usize)> {
1750 use crate::parser::inlines::inline_html::{parse_close_tag, parse_open_tag};
1751
1752 let bytes = content.as_bytes();
1753 let opener_end = parse_open_tag(&content[start..])?;
1754 let mut i = start + opener_end;
1755 let mut depth = 1usize;
1756 while i < bytes.len() {
1757 if bytes[i] != b'<' {
1758 i += 1;
1759 continue;
1760 }
1761 let rest = &content[i..];
1762 if let Some(end) = parse_open_tag(rest) {
1763 let tag = &rest[..end];
1764 if extract_html_tag_name(tag).is_some_and(|n| n.eq_ignore_ascii_case(name)) {
1765 depth += 1;
1766 }
1767 i += end;
1768 continue;
1769 }
1770 if let Some(end) = parse_close_tag(rest) {
1771 let tag = &rest[..end];
1772 if extract_html_tag_name(tag).is_some_and(|n| n.eq_ignore_ascii_case(name)) {
1773 depth -= 1;
1774 if depth == 0 {
1775 return Some((i, i + end));
1776 }
1777 }
1778 i += end;
1779 continue;
1780 }
1781 i += 1;
1782 }
1783 None
1784}
1785
1786fn is_raw_text_element_open(s: &str) -> bool {
1791 let bytes = s.as_bytes();
1792 if bytes.is_empty() || bytes[0] != b'<' {
1793 return false;
1794 }
1795 let rest = &s[1..];
1796 for tag in ["script", "style", "pre", "textarea"] {
1797 if rest.len() < tag.len() {
1798 continue;
1799 }
1800 if rest[..tag.len()].eq_ignore_ascii_case(tag) {
1801 let after = rest.as_bytes().get(tag.len()).copied();
1802 match after {
1803 None => return true,
1804 Some(b' ') | Some(b'\t') | Some(b'\n') | Some(b'>') | Some(b'/') => {
1805 return true;
1806 }
1807 _ => {}
1808 }
1809 }
1810 }
1811 false
1812}
1813
1814fn collect_block(node: &SyntaxNode, out: &mut Vec<Block>) {
1818 if node.kind() == SyntaxKind::HTML_BLOCK_DIV {
1819 out.push(html_div_block(node));
1823 return;
1824 }
1825 if node.kind() == SyntaxKind::HTML_BLOCK {
1826 emit_html_block(node, out);
1831 return;
1832 }
1833 if let Some(b) = block_from(node) {
1834 out.push(b);
1835 }
1836}
1837
1838fn parse_pandoc_blocks(text: &str) -> Vec<Block> {
1842 if text.trim().is_empty() {
1843 return Vec::new();
1844 }
1845 let opts = crate::ParserOptions {
1846 flavor: crate::Flavor::Pandoc,
1847 dialect: crate::Dialect::for_flavor(crate::Flavor::Pandoc),
1848 extensions: crate::Extensions::for_flavor(crate::Flavor::Pandoc),
1849 ..crate::ParserOptions::default()
1850 };
1851 let doc = crate::parse(text, Some(opts));
1852 let outer = REFS_CTX.with(|c| std::mem::take(&mut *c.borrow_mut()));
1863 let inner_ctx = build_refs_ctx_inherited(&doc, Some(&outer));
1864 REFS_CTX.with(|c| *c.borrow_mut() = inner_ctx);
1865 let mut out = Vec::new();
1866 for child in doc.children() {
1867 collect_block(&child, &mut out);
1868 }
1869 REFS_CTX.with(|c| *c.borrow_mut() = outer);
1870 out
1871}
1872
1873fn tex_block(node: &SyntaxNode) -> Block {
1874 let mut content = node.text().to_string();
1875 while content.ends_with('\n') {
1876 content.pop();
1877 }
1878 Block::RawBlock("tex".to_string(), content)
1879}
1880
1881fn fenced_div(node: &SyntaxNode) -> Block {
1882 let attr = node
1883 .children()
1884 .find(|c| c.kind() == SyntaxKind::DIV_FENCE_OPEN)
1885 .and_then(|open| open.children().find(|c| c.kind() == SyntaxKind::DIV_INFO))
1886 .map(|info| {
1887 if attr_node_is_structured(&info) {
1890 attr_from_attribute_node(&info)
1891 } else {
1892 parse_div_info(info.text().to_string().trim())
1893 }
1894 })
1895 .unwrap_or_default();
1896 let mut blocks = Vec::new();
1897 for child in node.children() {
1898 match child.kind() {
1899 SyntaxKind::DIV_FENCE_OPEN | SyntaxKind::DIV_FENCE_CLOSE => {}
1900 _ => collect_block(&child, &mut blocks),
1901 }
1902 }
1903 Block::Div(attr, blocks)
1904}
1905
1906fn parse_div_info(info: &str) -> Attr {
1909 if info.starts_with('{') && info.ends_with('}') {
1910 return parse_attr_block(&info[1..info.len() - 1]);
1911 }
1912 if !info.is_empty() {
1913 return Attr {
1914 id: String::new(),
1915 classes: vec![info.to_string()],
1916 kvs: Vec::new(),
1917 };
1918 }
1919 Attr::default()
1920}
1921
1922fn attr_node_is_structured(node: &SyntaxNode) -> bool {
1927 node.children_with_tokens().any(|el| {
1928 matches!(
1929 el.kind(),
1930 SyntaxKind::ATTR_ID | SyntaxKind::ATTR_CLASS | SyntaxKind::ATTR_KEY_VALUE
1931 )
1932 })
1933}
1934
1935fn attr_from_attribute_node(attr_node: &SyntaxNode) -> Attr {
1941 if !attr_node_is_structured(attr_node) {
1942 let raw = attr_node.text().to_string();
1943 return raw
1944 .trim()
1945 .strip_prefix('{')
1946 .and_then(|s| s.strip_suffix('}'))
1947 .map(parse_attr_block)
1948 .unwrap_or_default();
1949 }
1950
1951 read_bare_attr_children(attr_node)
1952}
1953
1954fn read_bare_attr_children(node: &SyntaxNode) -> Attr {
1961 let mut attr = Attr::default();
1962 for el in node.children_with_tokens() {
1963 match el.kind() {
1964 SyntaxKind::ATTR_ID => {
1965 if let Some(t) = el.as_token() {
1966 attr.id = t.text().strip_prefix('#').unwrap_or(t.text()).to_string();
1967 }
1968 }
1969 SyntaxKind::ATTR_CLASS => {
1970 if let Some(c) = el.as_token().and_then(|t| t.text().strip_prefix('.')) {
1973 attr.classes.push(c.to_string());
1974 }
1975 }
1976 SyntaxKind::ATTR_KEY_VALUE => {
1977 if let Some(kv) = el.as_node() {
1978 let key = attr_kv_child_text(kv, SyntaxKind::ATTR_KEY);
1979 if !key.is_empty() {
1980 let value = strip_attr_value_quotes(&attr_kv_child_text(
1981 kv,
1982 SyntaxKind::ATTR_VALUE,
1983 ));
1984 attr.kvs.push((key, value));
1985 }
1986 }
1987 }
1988 _ => {}
1989 }
1990 }
1991 attr
1992}
1993
1994fn attr_kv_child_text(kv: &SyntaxNode, kind: SyntaxKind) -> String {
1996 kv.children_with_tokens()
1997 .find(|el| el.kind() == kind)
1998 .and_then(|el| el.as_token().map(|t| t.text().to_string()))
1999 .unwrap_or_default()
2000}
2001
2002fn strip_attr_value_quotes(raw: &str) -> String {
2005 if raw.len() >= 2 && raw.starts_with('"') && raw.ends_with('"') {
2006 raw[1..raw.len() - 1].to_string()
2007 } else {
2008 raw.to_string()
2009 }
2010}
2011
2012fn strip_any_quotes(raw: &str) -> String {
2015 let bytes = raw.as_bytes();
2016 if bytes.len() >= 2 {
2017 let q = bytes[0];
2018 if (q == b'"' || q == b'\'') && bytes[bytes.len() - 1] == q {
2019 return raw[1..raw.len() - 1].to_string();
2020 }
2021 }
2022 raw.to_string()
2023}
2024
2025fn attr_from_html_attrs_node(node: &SyntaxNode) -> Attr {
2031 let mut attr = Attr::default();
2032 for el in node.children_with_tokens() {
2033 match el.kind() {
2034 SyntaxKind::ATTR_ID => {
2035 if attr.id.is_empty()
2036 && let Some(t) = el.as_token()
2037 {
2038 attr.id = t.text().to_string();
2039 }
2040 }
2041 SyntaxKind::ATTR_CLASS => {
2042 if let Some(t) = el.as_token() {
2043 attr.classes.push(t.text().to_string());
2044 }
2045 }
2046 SyntaxKind::ATTR_KEY_VALUE => {
2047 if let Some(kv) = el.as_node() {
2048 let key = attr_kv_child_text(kv, SyntaxKind::ATTR_KEY);
2049 if !key.is_empty() {
2050 let value =
2051 strip_any_quotes(&attr_kv_child_text(kv, SyntaxKind::ATTR_VALUE));
2052 attr.kvs.push((key, value));
2053 }
2054 }
2055 }
2056 _ => {}
2057 }
2058 }
2059 attr
2060}
2061
2062fn extract_attr_from_node(parent: &SyntaxNode) -> Attr {
2066 parent
2067 .children_with_tokens()
2068 .find(|el| el.kind() == SyntaxKind::ATTRIBUTE)
2069 .map(|el| match el {
2070 NodeOrToken::Node(n) => attr_from_attribute_node(&n),
2071 NodeOrToken::Token(t) => t
2072 .text()
2073 .trim()
2074 .strip_prefix('{')
2075 .and_then(|s| s.strip_suffix('}'))
2076 .map(parse_attr_block)
2077 .unwrap_or_default(),
2078 })
2079 .unwrap_or_default()
2080}
2081
2082fn parse_attr_block(s: &str) -> Attr {
2086 let mut id = String::new();
2087 let mut classes: Vec<String> = Vec::new();
2088 let mut kvs: Vec<(String, String)> = Vec::new();
2089 let bytes = s.as_bytes();
2090 let mut i = 0usize;
2091 while i < bytes.len() {
2092 match bytes[i] {
2093 b' ' | b'\t' | b'\n' | b'\r' => {
2094 i += 1;
2095 }
2096 b'#' => {
2097 let start = i + 1;
2098 let mut j = start;
2099 while j < bytes.len() && !matches!(bytes[j], b' ' | b'\t' | b'\n' | b'\r') {
2100 j += 1;
2101 }
2102 id = s[start..j].to_string();
2103 i = j;
2104 }
2105 b'.' => {
2106 let start = i + 1;
2107 let mut j = start;
2108 while j < bytes.len() && !matches!(bytes[j], b' ' | b'\t' | b'\n' | b'\r') {
2109 j += 1;
2110 }
2111 classes.push(s[start..j].to_string());
2112 i = j;
2113 }
2114 _ => {
2115 let key_start = i;
2117 while i < bytes.len() && !matches!(bytes[i], b' ' | b'\t' | b'\n' | b'\r' | b'=') {
2118 i += 1;
2119 }
2120 let key = s[key_start..i].to_string();
2121 if i < bytes.len() && bytes[i] == b'=' {
2122 i += 1;
2123 let value = if i < bytes.len() && bytes[i] == b'"' {
2124 i += 1;
2125 let v_start = i;
2126 while i < bytes.len() && bytes[i] != b'"' {
2127 i += 1;
2128 }
2129 let v = s[v_start..i].to_string();
2130 if i < bytes.len() {
2131 i += 1;
2132 }
2133 v
2134 } else {
2135 let v_start = i;
2136 while i < bytes.len() && !matches!(bytes[i], b' ' | b'\t' | b'\n' | b'\r') {
2137 i += 1;
2138 }
2139 s[v_start..i].to_string()
2140 };
2141 kvs.push((key, value));
2142 } else if !key.is_empty() {
2143 classes.push(key);
2145 }
2146 }
2147 }
2148 }
2149 Attr { id, classes, kvs }
2150}
2151
2152fn definition_list(node: &SyntaxNode) -> Block {
2153 let items: Vec<(Vec<Inline>, Vec<Vec<Block>>)> = node
2154 .children()
2155 .filter(|c| c.kind() == SyntaxKind::DEFINITION_ITEM)
2156 .map(|item| {
2157 let term = item
2158 .children()
2159 .find(|c| c.kind() == SyntaxKind::TERM)
2160 .map(|t| coalesce_inlines(inlines_from(&t)))
2161 .unwrap_or_default();
2162 let loose = is_loose_definition_item(&item);
2163 let defs: Vec<Vec<Block>> = item
2164 .children()
2165 .filter(|c| c.kind() == SyntaxKind::DEFINITION)
2166 .map(|d| definition_blocks(&d, loose))
2167 .collect();
2168 (term, defs)
2169 })
2170 .collect();
2171 Block::DefinitionList(items)
2172}
2173
2174fn is_loose_definition_item(item: &SyntaxNode) -> bool {
2180 let mut saw_term = false;
2181 for child in item.children_with_tokens() {
2182 if let NodeOrToken::Node(n) = child {
2183 match n.kind() {
2184 SyntaxKind::TERM => {
2185 saw_term = true;
2186 }
2187 SyntaxKind::BLANK_LINE if saw_term => {
2188 return true;
2189 }
2190 SyntaxKind::DEFINITION => {
2191 return false;
2192 }
2193 _ => {}
2194 }
2195 }
2196 }
2197 false
2198}
2199
2200fn definition_blocks(def_node: &SyntaxNode, loose: bool) -> Vec<Block> {
2201 let extra = definition_content_offset(def_node);
2206 let mut out = Vec::new();
2207 for child in def_node.children() {
2208 match child.kind() {
2209 SyntaxKind::PLAIN => {
2210 let inlines = coalesce_inlines(inlines_from(&child));
2211 if loose {
2212 out.push(Block::Para(inlines));
2213 } else {
2214 out.push(Block::Plain(inlines));
2215 }
2216 }
2217 SyntaxKind::PARAGRAPH => {
2218 out.push(Block::Para(coalesce_inlines(inlines_from(&child))));
2219 }
2220 SyntaxKind::CODE_BLOCK if extra > 0 => {
2221 out.push(indented_code_block_with_extra_strip(&child, extra));
2222 }
2223 _ => collect_block(&child, &mut out),
2224 }
2225 }
2226 out
2227}
2228
2229fn definition_content_offset(def_node: &SyntaxNode) -> usize {
2234 let mut col = 0usize;
2235 let mut saw_marker = false;
2236 for el in def_node.children_with_tokens() {
2237 if let NodeOrToken::Token(t) = el {
2238 match t.kind() {
2239 SyntaxKind::DEFINITION_MARKER => {
2240 col = advance_col(col, t.text());
2241 saw_marker = true;
2242 }
2243 SyntaxKind::WHITESPACE if saw_marker => {
2244 return advance_col(col, t.text());
2245 }
2246 _ if saw_marker => return col,
2247 _ => {}
2248 }
2249 } else if saw_marker {
2250 return col;
2251 }
2252 }
2253 col
2254}
2255
2256fn advance_col(start: usize, s: &str) -> usize {
2259 let mut col = start;
2260 for c in s.chars() {
2261 if c == '\t' {
2262 col = (col / 4 + 1) * 4;
2263 } else {
2264 col += 1;
2265 }
2266 }
2267 col
2268}
2269
2270fn line_block(node: &SyntaxNode) -> Block {
2271 let lines: Vec<Vec<Inline>> = node
2272 .children()
2273 .filter(|c| c.kind() == SyntaxKind::LINE_BLOCK_LINE)
2274 .map(|line| {
2275 let mut out = Vec::new();
2276 for el in line.children_with_tokens() {
2277 match el {
2278 NodeOrToken::Token(t) => match t.kind() {
2279 SyntaxKind::LINE_BLOCK_MARKER | SyntaxKind::NEWLINE => {}
2280 _ => push_token_inline(&t, &mut out),
2281 },
2282 NodeOrToken::Node(n) => out.push(inline_from_node(&n)),
2283 }
2284 }
2285 coalesce_inlines(out)
2286 })
2287 .collect();
2288 Block::LineBlock(lines)
2289}
2290
2291fn latex_command_inline(node: &SyntaxNode) -> Inline {
2292 let content = node.text().to_string();
2293 Inline::RawInline("tex".to_string(), content)
2294}
2295
2296fn bracketed_span_inline(node: &SyntaxNode) -> Inline {
2297 let is_html = node
2298 .children_with_tokens()
2299 .any(|el| matches!(&el, NodeOrToken::Token(t) if t.kind() == SyntaxKind::SPAN_BRACKET_OPEN && t.text().starts_with('<')));
2300 let attr = node
2301 .children()
2302 .find(|n| n.kind() == SyntaxKind::SPAN_ATTRIBUTES)
2303 .map(|n| {
2304 if is_html {
2305 attr_from_html_attrs_node(&n)
2308 } else {
2309 attr_from_attribute_node(&n)
2313 }
2314 })
2315 .unwrap_or_default();
2316 let content = node
2317 .children()
2318 .find(|c| c.kind() == SyntaxKind::SPAN_CONTENT)
2319 .map(|n| coalesce_inlines(inlines_from(&n)))
2320 .unwrap_or_default();
2321 Inline::Span(attr, content)
2322}
2323
2324fn inline_html_span_inline(node: &SyntaxNode) -> Inline {
2325 let attr = node
2326 .children()
2327 .find(|c| c.kind() == SyntaxKind::HTML_ATTRS)
2328 .map(|n| attr_from_html_attrs_node(&n))
2329 .unwrap_or_default();
2330 let content = node
2331 .children()
2332 .find(|c| c.kind() == SyntaxKind::SPAN_CONTENT)
2333 .map(|n| coalesce_inlines(inlines_from(&n)))
2334 .unwrap_or_default();
2335 Inline::Span(attr, content)
2336}
2337
2338fn pipe_table(node: &SyntaxNode) -> Option<TableData> {
2339 let mut header_cells: Vec<Vec<Inline>> = Vec::new();
2340 let mut body_rows: Vec<Vec<Vec<Inline>>> = Vec::new();
2341 let mut aligns: Vec<&'static str> = Vec::new();
2342 let mut caption_inlines: Vec<Inline> = Vec::new();
2343 let mut caption_attr_from_node: Option<Attr> = None;
2344 for child in node.children() {
2345 match child.kind() {
2346 SyntaxKind::TABLE_HEADER => {
2347 header_cells = pipe_table_cells(&child);
2348 }
2349 SyntaxKind::TABLE_SEPARATOR => {
2350 let raw = child.text().to_string();
2351 aligns = pipe_separator_aligns(&raw);
2352 }
2353 SyntaxKind::TABLE_ROW => {
2354 body_rows.push(pipe_table_cells(&child));
2355 }
2356 SyntaxKind::TABLE_CAPTION => {
2357 let (inlines, attr) = pipe_table_caption(&child);
2358 caption_inlines = inlines;
2359 caption_attr_from_node = attr;
2360 }
2361 _ => {}
2362 }
2363 }
2364 let cols = header_cells
2365 .len()
2366 .max(body_rows.iter().map(Vec::len).max().unwrap_or(0))
2367 .max(aligns.len());
2368 if cols == 0 {
2369 return None;
2370 }
2371 while aligns.len() < cols {
2372 aligns.push("AlignDefault");
2373 }
2374 let head_rows = if header_cells.is_empty() {
2375 Vec::new()
2376 } else {
2377 vec![cells_to_plain_blocks(header_cells, cols)]
2378 };
2379 let body_rows: Vec<Vec<GridCell>> = body_rows
2380 .into_iter()
2381 .map(|cells| cells_to_plain_blocks(cells, cols))
2382 .collect();
2383 let (attr, caption_inlines) = resolve_caption_attr(caption_inlines, caption_attr_from_node);
2384 Some(TableData {
2385 attr,
2386 caption: caption_inlines,
2387 aligns,
2388 widths: vec![None; cols],
2389 head_rows,
2390 body_rows,
2391 foot_rows: Vec::new(),
2392 })
2393}
2394
2395fn pipe_table_cells(row: &SyntaxNode) -> Vec<Vec<Inline>> {
2396 row.children()
2397 .filter(|c| c.kind() == SyntaxKind::TABLE_CELL)
2398 .map(|cell| coalesce_inlines(inlines_from(&cell)))
2399 .collect()
2400}
2401
2402fn extract_caption_attrs(mut inlines: Vec<Inline>) -> (Attr, Vec<Inline>) {
2410 let last_str_end = inlines
2411 .iter()
2412 .rposition(|i| matches!(i, Inline::Str(s) if s.ends_with('}')));
2413 let Some(end_idx) = last_str_end else {
2414 return (Attr::default(), inlines);
2415 };
2416 let mut start_idx = end_idx;
2420 let mut found_open = false;
2421 loop {
2422 match &inlines[start_idx] {
2423 Inline::Str(s) => {
2424 if s.starts_with('{') {
2425 found_open = true;
2426 break;
2427 }
2428 }
2429 Inline::Space => {}
2430 _ => return (Attr::default(), inlines),
2431 }
2432 if start_idx == 0 {
2433 break;
2434 }
2435 start_idx -= 1;
2436 }
2437 if !found_open {
2438 return (Attr::default(), inlines);
2439 }
2440 let mut raw = String::new();
2443 for el in &inlines[start_idx..=end_idx] {
2444 match el {
2445 Inline::Str(s) => raw.push_str(s),
2446 Inline::Space => raw.push(' '),
2447 _ => return (Attr::default(), inlines),
2448 }
2449 }
2450 if !(raw.starts_with('{') && raw.ends_with('}')) {
2451 return (Attr::default(), inlines);
2452 }
2453 let inner = &raw[1..raw.len() - 1];
2454 let attr = parse_attr_block(inner);
2455 inlines.truncate(start_idx);
2456 if matches!(inlines.last(), Some(Inline::Space)) {
2457 inlines.pop();
2458 }
2459 (attr, inlines)
2460}
2461
2462fn resolve_caption_attr(
2467 caption_inlines: Vec<Inline>,
2468 caption_attr_from_node: Option<Attr>,
2469) -> (Attr, Vec<Inline>) {
2470 match caption_attr_from_node {
2471 Some(attr) => (attr, caption_inlines),
2472 None => extract_caption_attrs(caption_inlines),
2473 }
2474}
2475
2476fn project_table_caption_from(node: &SyntaxNode) -> (Vec<Inline>, Option<Attr>) {
2480 node.children()
2481 .find(|c| c.kind() == SyntaxKind::TABLE_CAPTION)
2482 .map(|n| pipe_table_caption(&n))
2483 .unwrap_or_else(|| (Vec::new(), None))
2484}
2485
2486fn pipe_table_caption(node: &SyntaxNode) -> (Vec<Inline>, Option<Attr>) {
2487 let mut out = Vec::new();
2492 let mut caption_attr: Option<Attr> = None;
2493 let mut after_prefix = false;
2494 for el in node.children_with_tokens() {
2495 match el {
2496 NodeOrToken::Node(n) => {
2497 if n.kind() == SyntaxKind::TABLE_CAPTION_PREFIX {
2498 after_prefix = true;
2499 continue;
2500 }
2501 if !after_prefix {
2502 continue;
2503 }
2504 if n.kind() == SyntaxKind::ATTRIBUTE {
2505 caption_attr = Some(attr_from_attribute_node(&n));
2506 if matches!(out.last(), Some(Inline::Space)) {
2508 out.pop();
2509 }
2510 continue;
2511 }
2512 out.push(inline_from_node(&n));
2513 }
2514 NodeOrToken::Token(t) => {
2515 if t.kind() == SyntaxKind::TABLE_CAPTION_PREFIX {
2516 after_prefix = true;
2517 continue;
2518 }
2519 if !after_prefix {
2520 continue;
2521 }
2522 if t.kind() == SyntaxKind::ATTRIBUTE {
2523 let raw = t.text();
2524 let inner = raw.trim().trim_start_matches('{').trim_end_matches('}');
2525 caption_attr = Some(parse_attr_block(inner));
2526 if matches!(out.last(), Some(Inline::Space)) {
2527 out.pop();
2528 }
2529 continue;
2530 }
2531 push_token_inline(&t, &mut out);
2532 }
2533 }
2534 }
2535 (coalesce_inlines(out), caption_attr)
2536}
2537
2538fn pipe_separator_aligns(raw: &str) -> Vec<&'static str> {
2539 let trimmed = raw.trim();
2544 let inner = trimmed.trim_start_matches('|').trim_end_matches('|');
2545 inner
2546 .split('|')
2547 .map(|seg| {
2548 let s = seg.trim();
2549 let left = s.starts_with(':');
2550 let right = s.ends_with(':');
2551 match (left, right) {
2552 (true, true) => "AlignCenter",
2553 (true, false) => "AlignLeft",
2554 (false, true) => "AlignRight",
2555 _ => "AlignDefault",
2556 }
2557 })
2558 .collect()
2559}
2560
2561fn cells_to_plain_blocks(cells: Vec<Vec<Inline>>, cols: usize) -> Vec<GridCell> {
2562 let mut out: Vec<GridCell> = cells
2563 .into_iter()
2564 .map(|inlines| {
2565 let blocks = if inlines.is_empty() {
2566 Vec::new()
2567 } else {
2568 vec![Block::Plain(inlines)]
2569 };
2570 GridCell::no_span(blocks)
2571 })
2572 .collect();
2573 while out.len() < cols {
2574 out.push(GridCell::no_span(Vec::new()));
2575 }
2576 out
2577}
2578
2579fn show_double(x: f64) -> String {
2583 if x == 0.0 {
2584 return "0.0".to_string();
2585 }
2586 let abs = x.abs();
2587 if (0.1..1e7).contains(&abs) {
2588 let s = format!("{x}");
2589 if s.contains('.') || s.contains('e') {
2590 s
2591 } else {
2592 format!("{s}.0")
2593 }
2594 } else {
2595 let s = format!("{x:e}");
2598 if let Some((m, e)) = s.split_once('e') {
2599 if m.contains('.') {
2600 s
2601 } else {
2602 format!("{m}.0e{e}")
2603 }
2604 } else {
2605 s
2606 }
2607 }
2608}
2609
2610fn simple_table(node: &SyntaxNode) -> Option<TableData> {
2627 let separator = node
2628 .children()
2629 .find(|c| c.kind() == SyntaxKind::TABLE_SEPARATOR)?;
2630 let cols = simple_table_dash_runs(&separator);
2631 if cols.is_empty() {
2632 return None;
2633 }
2634 let header = node
2635 .children()
2636 .find(|c| c.kind() == SyntaxKind::TABLE_HEADER);
2637 let mut body_rows_nodes: Vec<SyntaxNode> = node
2641 .children()
2642 .filter(|c| c.kind() == SyntaxKind::TABLE_ROW)
2643 .collect();
2644 if header.is_none()
2645 && body_rows_nodes
2646 .last()
2647 .map(simple_table_row_is_all_dashes)
2648 .unwrap_or(false)
2649 {
2650 body_rows_nodes.pop();
2651 }
2652 let aligns = if let Some(h) = &header {
2654 simple_table_aligns(h, &cols)
2655 } else if let Some(r0) = body_rows_nodes.first() {
2656 simple_table_aligns(r0, &cols)
2657 } else {
2658 vec!["AlignDefault"; cols.len()]
2659 };
2660 let head_rows = match &header {
2661 Some(h) => {
2662 let cells: Vec<Vec<Inline>> = simple_table_row_cells(h);
2663 vec![cells_to_plain_blocks(cells, cols.len())]
2664 }
2665 None => Vec::new(),
2666 };
2667 let body_rows: Vec<Vec<GridCell>> = body_rows_nodes
2668 .iter()
2669 .map(|r| cells_to_plain_blocks(simple_table_row_cells(r), cols.len()))
2670 .collect();
2671 let (caption_inlines, caption_attr_from_node) = project_table_caption_from(node);
2672 let (attr, caption_inlines) = resolve_caption_attr(caption_inlines, caption_attr_from_node);
2673 Some(TableData {
2674 attr,
2675 caption: caption_inlines,
2676 aligns,
2677 widths: vec![None; cols.len()],
2678 head_rows,
2679 body_rows,
2680 foot_rows: Vec::new(),
2681 })
2682}
2683
2684fn simple_table_dash_runs(separator: &SyntaxNode) -> Vec<(usize, usize)> {
2688 let raw = separator.text().to_string();
2689 let line = raw.trim_end_matches(['\n', '\r']);
2690 let mut runs = Vec::new();
2691 let mut start: Option<usize> = None;
2692 for (i, ch) in line.char_indices() {
2693 if ch == '-' {
2694 if start.is_none() {
2695 start = Some(i);
2696 }
2697 } else if let Some(s) = start.take() {
2698 runs.push((s, i - 1));
2699 }
2700 }
2701 if let Some(s) = start.take() {
2702 runs.push((s, line.len() - 1));
2703 }
2704 runs
2705}
2706
2707fn simple_table_row_cells(row: &SyntaxNode) -> Vec<Vec<Inline>> {
2708 row.children()
2713 .filter(|c| c.kind() == SyntaxKind::TABLE_CELL)
2714 .map(|cell| coalesce_inlines(inlines_from(&cell)))
2715 .collect()
2716}
2717
2718fn simple_table_row_is_all_dashes(row: &SyntaxNode) -> bool {
2719 let mut had_cell = false;
2720 for cell in row
2721 .children()
2722 .filter(|c| c.kind() == SyntaxKind::TABLE_CELL)
2723 {
2724 let text = cell.text().to_string();
2725 let trimmed = text.trim();
2726 if trimmed.is_empty() {
2727 continue;
2728 }
2729 had_cell = true;
2730 if !trimmed.chars().all(|c| c == '-') {
2731 return false;
2732 }
2733 }
2734 had_cell
2735}
2736
2737fn simple_table_aligns(row: &SyntaxNode, cols: &[(usize, usize)]) -> Vec<&'static str> {
2745 let row_start: u32 = row.text_range().start().into();
2746 let mut cell_ranges: Vec<(usize, usize)> = Vec::new();
2747 for cell in row
2748 .children()
2749 .filter(|c| c.kind() == SyntaxKind::TABLE_CELL)
2750 {
2751 if cell.text_range().is_empty() {
2752 continue;
2753 }
2754 let text = cell.text().to_string();
2755 let lstrip = text.chars().take_while(|c| *c == ' ' || *c == '\t').count();
2756 let rstrip = text
2757 .chars()
2758 .rev()
2759 .take_while(|c| *c == ' ' || *c == '\t')
2760 .count();
2761 let trimmed_len = text.chars().count().saturating_sub(lstrip + rstrip);
2762 if trimmed_len == 0 {
2763 continue;
2764 }
2765 let start: u32 = cell.text_range().start().into();
2766 let s = (start - row_start) as usize;
2767 let visible_start = s + lstrip;
2768 let visible_end = visible_start + trimmed_len - 1;
2769 cell_ranges.push((visible_start, visible_end));
2770 }
2771 cols.iter()
2772 .map(|(col_start, col_end)| {
2773 let cell = cell_ranges
2774 .iter()
2775 .find(|(cs, ce)| ce >= col_start && cs <= col_end);
2776 match cell {
2777 Some((cs, ce)) => {
2778 let left_flush = cs == col_start;
2779 let right_flush = ce == col_end;
2780 match (left_flush, right_flush) {
2781 (true, true) => "AlignDefault",
2782 (true, false) => "AlignLeft",
2783 (false, true) => "AlignRight",
2784 (false, false) => "AlignCenter",
2785 }
2786 }
2787 None => "AlignDefault",
2788 }
2789 })
2790 .collect()
2791}
2792
2793#[allow(clippy::needless_range_loop)]
2820fn grid_table(node: &SyntaxNode) -> Option<TableData> {
2821 let mut tagged: Vec<(SyntaxKind, String)> = Vec::new();
2823 for child in node.children() {
2824 if child.kind() == SyntaxKind::TABLE_CAPTION {
2825 continue;
2826 }
2827 let text = child.text().to_string();
2828 for line in text.split_inclusive('\n') {
2829 let trimmed = line.trim_end_matches('\n');
2830 tagged.push((child.kind(), trimmed.to_string()));
2831 }
2832 }
2833 if tagged.is_empty() {
2834 return None;
2835 }
2836
2837 let max_width = tagged
2839 .iter()
2840 .map(|(_, l)| l.chars().count())
2841 .max()
2842 .unwrap_or(0);
2843 let grid: Vec<Vec<char>> = tagged
2844 .iter()
2845 .map(|(_, l)| {
2846 let mut chars: Vec<char> = l.chars().collect();
2847 chars.resize(max_width, ' ');
2848 chars
2849 })
2850 .collect();
2851 let nlines = grid.len();
2852
2853 let is_sep_line: Vec<bool> = grid
2857 .iter()
2858 .map(|row| {
2859 row.contains(&'+')
2860 && row
2861 .iter()
2862 .all(|&c| matches!(c, '+' | '-' | '=' | ':' | '|' | ' '))
2863 })
2864 .collect();
2865
2866 let mut col_set: std::collections::BTreeSet<usize> = std::collections::BTreeSet::new();
2868 for (i, row) in grid.iter().enumerate() {
2869 if !is_sep_line[i] {
2870 continue;
2871 }
2872 for (j, &c) in row.iter().enumerate() {
2873 if c == '+' {
2874 col_set.insert(j);
2875 }
2876 }
2877 }
2878 let cols_pos: Vec<usize> = col_set.into_iter().collect();
2879 if cols_pos.len() < 2 {
2880 return None;
2881 }
2882 let ncols = cols_pos.len() - 1;
2883
2884 let row_seps: Vec<usize> = (0..nlines).filter(|&i| is_sep_line[i]).collect();
2886 if row_seps.len() < 2 {
2887 return None;
2888 }
2889 let nrows = row_seps.len() - 1;
2890
2891 let mut block_kind: Vec<&'static str> = vec!["body"; nrows];
2894 for r in 0..nrows {
2895 let start = row_seps[r];
2896 let end = row_seps[r + 1];
2897 for i in (start + 1)..end {
2898 match tagged[i].0 {
2899 SyntaxKind::TABLE_HEADER => block_kind[r] = "head",
2900 SyntaxKind::TABLE_FOOTER => block_kind[r] = "foot",
2901 _ => {}
2902 }
2903 }
2904 }
2905
2906 let mut occupied = vec![vec![false; ncols]; nrows];
2908 let mut cells: Vec<(usize, usize, u32, u32, String)> = Vec::new();
2910 for sr in 0..nrows {
2911 for sc in 0..ncols {
2912 if occupied[sr][sc] {
2913 continue;
2914 }
2915 let i = row_seps[sr];
2916 let j = cols_pos[sc];
2917 if grid[i][j] != '+' {
2918 continue;
2923 }
2924 let Some((er, ec, content)) = find_grid_cell(&grid, i, j, sr, sc, &cols_pos, &row_seps)
2925 else {
2926 continue;
2927 };
2928 let row_span = (er - sr) as u32;
2929 let col_span = (ec - sc) as u32;
2930 for r in sr..er {
2931 for c in sc..ec {
2932 occupied[r][c] = true;
2933 }
2934 }
2935 cells.push((sr, sc, row_span, col_span, content));
2936 }
2937 }
2938
2939 let mut head_rows: Vec<Vec<GridCell>> = Vec::new();
2942 let mut body_rows: Vec<Vec<GridCell>> = Vec::new();
2943 let mut foot_rows: Vec<Vec<GridCell>> = Vec::new();
2944 for r in 0..nrows {
2945 let mut row_cells: Vec<&(usize, usize, u32, u32, String)> =
2946 cells.iter().filter(|(sr, _, _, _, _)| *sr == r).collect();
2947 row_cells.sort_by_key(|(_, sc, _, _, _)| *sc);
2948 let row: Vec<GridCell> = row_cells
2949 .into_iter()
2950 .map(|(_, _, rs, cs, text)| {
2951 let blocks = parse_grid_cell_text(text);
2952 GridCell {
2953 row_span: *rs,
2954 col_span: *cs,
2955 blocks,
2956 }
2957 })
2958 .collect();
2959 match block_kind[r] {
2960 "head" => head_rows.push(row),
2961 "foot" => foot_rows.push(row),
2962 _ => body_rows.push(row),
2963 }
2964 }
2965
2966 let alignment_sep = node
2969 .children()
2970 .filter(|c| c.kind() == SyntaxKind::TABLE_SEPARATOR)
2971 .find(|c| c.text().to_string().contains(':'))
2972 .or_else(|| {
2973 node.children()
2974 .find(|c| c.kind() == SyntaxKind::TABLE_SEPARATOR)
2975 })?;
2976 let widths = grid_dash_widths(&alignment_sep);
2977 let aligns_raw = alignment_sep.text().to_string();
2978 let aligns = if aligns_raw.contains(':') {
2979 grid_separator_aligns(&aligns_raw, ncols)
2980 } else {
2981 vec!["AlignDefault"; ncols]
2982 };
2983
2984 let (caption_inlines, caption_attr_from_node) = project_table_caption_from(node);
2986 let (attr, caption_inlines) = resolve_caption_attr(caption_inlines, caption_attr_from_node);
2987
2988 Some(TableData {
2989 attr,
2990 caption: caption_inlines,
2991 aligns,
2992 widths: widths.into_iter().map(Some).collect(),
2993 head_rows,
2994 body_rows,
2995 foot_rows,
2996 })
2997}
2998
2999#[allow(clippy::needless_range_loop)]
3009fn find_grid_cell(
3010 grid: &[Vec<char>],
3011 i: usize,
3012 j: usize,
3013 sr: usize,
3014 sc: usize,
3015 cols_pos: &[usize],
3016 row_seps: &[usize],
3017) -> Option<(usize, usize, String)> {
3018 let nrows = row_seps.len() - 1;
3019 let ncols = cols_pos.len() - 1;
3020
3021 for ec in (sc + 1)..=ncols {
3022 let k = cols_pos[ec];
3023 let top_ok = (j + 1..k).all(|c| matches!(grid[i][c], '-' | '=' | ':' | '+'));
3025 if !top_ok {
3026 break;
3028 }
3029 for er in (sr + 1)..=nrows {
3030 let l = row_seps[er];
3031 let left_ok = (i + 1..l).all(|r| matches!(grid[r][j], '|' | '+'));
3033 if !left_ok {
3034 break;
3035 }
3036 let right_ok = (i + 1..l).all(|r| matches!(grid[r][k], '|' | '+'));
3038 if !right_ok {
3039 continue;
3040 }
3041 let bot_ok = (j + 1..k).all(|c| matches!(grid[l][c], '-' | '=' | ':' | '+'));
3043 if !bot_ok {
3044 continue;
3045 }
3046 if grid[l][j] != '+' || grid[l][k] != '+' {
3047 continue;
3048 }
3049 let interior_split = (i + 1..l).any(|m| {
3055 grid[m][j] == '+'
3056 && grid[m][k] == '+'
3057 && (j + 1..k).all(|c| matches!(grid[m][c], '-' | '=' | ':' | '+'))
3058 });
3059 if interior_split {
3060 continue;
3061 }
3062
3063 let mut content_lines: Vec<String> = Vec::new();
3067 for r in (i + 1)..l {
3068 let slice: String = grid[r][j + 1..k].iter().collect();
3069 let stripped = slice.strip_prefix(' ').unwrap_or(&slice).to_string();
3070 content_lines.push(stripped.trim_end().to_string());
3071 }
3072 let first = content_lines.iter().position(|s| !s.is_empty());
3074 let last = content_lines.iter().rposition(|s| !s.is_empty());
3075 let content = match (first, last) {
3076 (Some(f), Some(l)) => content_lines[f..=l].join("\n"),
3077 _ => String::new(),
3078 };
3079 return Some((er, ec, content));
3080 }
3081 }
3082 None
3083}
3084
3085fn parse_grid_cell_text(text: &str) -> Vec<Block> {
3089 if text.trim().is_empty() {
3090 return Vec::new();
3091 }
3092 let opts = crate::ParserOptions {
3093 flavor: crate::Flavor::Pandoc,
3094 dialect: crate::Dialect::for_flavor(crate::Flavor::Pandoc),
3095 extensions: crate::Extensions::for_flavor(crate::Flavor::Pandoc),
3096 ..crate::ParserOptions::default()
3097 };
3098 let doc = crate::parse(text, Some(opts));
3099 let mut out = Vec::new();
3100 for child in doc.children() {
3101 if let Some(block) = block_from(&child) {
3102 let block = match block {
3103 Block::Para(inlines) => Block::Plain(inlines),
3104 other => other,
3105 };
3106 out.push(block);
3107 }
3108 }
3109 out
3110}
3111
3112fn grid_dash_widths(separator: &SyntaxNode) -> Vec<f64> {
3123 let raw_text = separator.text().to_string();
3124 let line = raw_text.trim_end_matches(['\n', '\r']);
3125 let mut raw: Vec<usize> = Vec::new();
3126 let mut count: usize = 0;
3127 let mut in_col = false;
3128 for ch in line.chars() {
3129 match ch {
3130 '+' => {
3131 if in_col {
3132 raw.push(count + 1);
3133 count = 0;
3134 }
3135 in_col = true;
3136 }
3137 _ => {
3138 if in_col {
3139 count += 1;
3140 }
3141 }
3142 }
3143 }
3144 if raw.is_empty() {
3145 return Vec::new();
3146 }
3147 let total: usize = raw.iter().sum();
3148 let count = raw.len();
3149 let norm = (total + count).saturating_sub(2).max(72) as f64;
3150 raw.into_iter().map(|w| w as f64 / norm).collect()
3151}
3152
3153fn grid_separator_aligns(raw: &str, cols: usize) -> Vec<&'static str> {
3154 let line = raw.trim_end_matches(['\n', '\r']);
3155 let mut aligns: Vec<&'static str> = Vec::with_capacity(cols);
3156 let mut col_start: Option<usize> = None;
3157 for (i, ch) in line.char_indices() {
3158 if ch == '+' {
3159 if let Some(s) = col_start.take() {
3160 let seg = &line[s..i];
3161 aligns.push(grid_segment_align(seg));
3162 }
3163 col_start = Some(i + 1);
3164 }
3165 }
3166 while aligns.len() < cols {
3167 aligns.push("AlignDefault");
3168 }
3169 aligns.truncate(cols);
3170 aligns
3171}
3172
3173fn grid_segment_align(seg: &str) -> &'static str {
3174 let bytes = seg.as_bytes();
3175 let left = bytes.first() == Some(&b':');
3176 let right = bytes.last() == Some(&b':');
3177 match (left, right) {
3178 (true, true) => "AlignCenter",
3179 (true, false) => "AlignLeft",
3180 (false, true) => "AlignRight",
3181 _ => "AlignDefault",
3182 }
3183}
3184
3185fn multiline_table(node: &SyntaxNode) -> Option<TableData> {
3195 let separators: Vec<SyntaxNode> = node
3198 .children()
3199 .filter(|c| c.kind() == SyntaxKind::TABLE_SEPARATOR)
3200 .collect();
3201 let header = node
3202 .children()
3203 .find(|c| c.kind() == SyntaxKind::TABLE_HEADER);
3204 let column_sep = if header.is_some() {
3205 separators.get(1).cloned()
3206 } else {
3207 separators.first().cloned()
3208 }?;
3209 let cols = simple_table_dash_runs(&column_sep);
3210 if cols.is_empty() {
3211 return None;
3212 }
3213 let raw: Vec<usize> = cols
3218 .iter()
3219 .enumerate()
3220 .map(|(i, (s, e))| {
3221 if i + 1 < cols.len() {
3222 cols[i + 1].0 - s
3223 } else {
3224 e - s + 2
3225 }
3226 })
3227 .collect();
3228 let total: usize = raw.iter().sum();
3229 let norm = (total.max(72)) as f64;
3230 let widths: Vec<f64> = raw.into_iter().map(|w| w as f64 / norm).collect();
3231 let aligns = if let Some(h) = &header {
3234 simple_table_aligns(h, &cols)
3235 } else if let Some(r0) = node.children().find(|c| c.kind() == SyntaxKind::TABLE_ROW) {
3236 simple_table_aligns(&r0, &cols)
3237 } else {
3238 vec!["AlignDefault"; cols.len()]
3239 };
3240 let head_rows = match &header {
3241 Some(h) => vec![
3242 multiline_row_cells_blocks(h, &cols)
3243 .into_iter()
3244 .map(GridCell::no_span)
3245 .collect(),
3246 ],
3247 None => Vec::new(),
3248 };
3249 let body_rows: Vec<Vec<GridCell>> = node
3250 .children()
3251 .filter(|c| c.kind() == SyntaxKind::TABLE_ROW)
3252 .map(|r| {
3253 multiline_row_cells_blocks(&r, &cols)
3254 .into_iter()
3255 .map(GridCell::no_span)
3256 .collect()
3257 })
3258 .collect();
3259 let (caption_inlines, caption_attr_from_node) = project_table_caption_from(node);
3260 let (attr, caption_inlines) = resolve_caption_attr(caption_inlines, caption_attr_from_node);
3261 Some(TableData {
3262 attr,
3263 caption: caption_inlines,
3264 aligns,
3265 widths: widths.into_iter().map(Some).collect(),
3266 head_rows,
3267 body_rows,
3268 foot_rows: Vec::new(),
3269 })
3270}
3271
3272fn multiline_row_cells_blocks(row: &SyntaxNode, cols: &[(usize, usize)]) -> Vec<Vec<Block>> {
3276 let row_start: u32 = row.text_range().start().into();
3277 let raw = row.text().to_string();
3278 let lines: Vec<&str> = raw.split_inclusive('\n').collect();
3282 let mut col_lines: Vec<Vec<String>> = vec![Vec::new(); cols.len()];
3283 let mut line_start_offset: usize = 0;
3284 for line in lines {
3285 let line_no_nl = line.trim_end_matches('\n');
3286 if line_no_nl.trim().is_empty() {
3287 line_start_offset += line.len();
3288 continue;
3289 }
3290 for (i, &(cs, ce)) in cols.iter().enumerate() {
3291 let slice = char_slice(line_no_nl, cs, ce + 1);
3293 let trimmed = slice.trim();
3294 if !trimmed.is_empty() {
3295 col_lines[i].push(trimmed.to_string());
3296 }
3297 }
3298 line_start_offset += line.len();
3299 }
3300 let _ = (row_start, line_start_offset);
3301 cols.iter()
3302 .enumerate()
3303 .map(|(i, _)| {
3304 let segments = &col_lines[i];
3305 if segments.is_empty() {
3306 return Vec::new();
3307 }
3308 let joined = segments.join("\n");
3314 let inlines = parse_cell_text_inlines(&joined);
3315 if inlines.is_empty() {
3316 return Vec::new();
3317 }
3318 vec![Block::Plain(coalesce_inlines(inlines))]
3319 })
3320 .collect()
3321}
3322
3323fn parse_cell_text_inlines(text: &str) -> Vec<Inline> {
3329 if text.trim().is_empty() {
3330 return Vec::new();
3331 }
3332 let opts = crate::ParserOptions {
3333 flavor: crate::Flavor::Pandoc,
3334 dialect: crate::Dialect::for_flavor(crate::Flavor::Pandoc),
3335 extensions: crate::Extensions::for_flavor(crate::Flavor::Pandoc),
3336 ..crate::ParserOptions::default()
3337 };
3338 let doc = crate::parse(text, Some(opts));
3339 for node in doc.descendants() {
3340 if matches!(node.kind(), SyntaxKind::PARAGRAPH | SyntaxKind::PLAIN) {
3341 return inlines_from(&node);
3342 }
3343 }
3344 Vec::new()
3345}
3346
3347fn char_slice(s: &str, start_char: usize, end_char: usize) -> &str {
3348 let mut start_byte = s.len();
3349 let mut end_byte = s.len();
3350 for (i, (b, _)) in s.char_indices().enumerate() {
3351 if i == start_char {
3352 start_byte = b;
3353 }
3354 if i == end_char {
3355 end_byte = b;
3356 break;
3357 }
3358 }
3359 if start_byte > end_byte {
3360 return "";
3361 }
3362 &s[start_byte..end_byte]
3363}
3364
3365fn list_block(node: &SyntaxNode) -> Block {
3366 let loose = is_loose_list(node);
3367 let items: Vec<Vec<Block>> = node
3368 .children()
3369 .filter(|c| c.kind() == SyntaxKind::LIST_ITEM)
3370 .map(|item| list_item_blocks(&item, loose))
3371 .collect();
3372 if list_is_ordered(node) {
3373 let (start, style, delim) = ordered_list_attrs(node);
3374 Block::OrderedList(start, style, delim, items)
3375 } else {
3376 Block::BulletList(items)
3377 }
3378}
3379
3380fn list_is_ordered(node: &SyntaxNode) -> bool {
3381 let Some(item) = node.children().find(|c| c.kind() == SyntaxKind::LIST_ITEM) else {
3382 return false;
3383 };
3384 let marker = item
3385 .children_with_tokens()
3386 .filter_map(|el| el.into_token())
3387 .find(|t| t.kind() == SyntaxKind::LIST_MARKER)
3388 .map(|t| t.text().to_string())
3389 .unwrap_or_default();
3390 let trimmed = marker.trim();
3391 !trimmed.starts_with(['-', '+', '*'])
3392}
3393
3394fn ordered_list_attrs(node: &SyntaxNode) -> (usize, &'static str, &'static str) {
3395 let item = node.children().find(|c| c.kind() == SyntaxKind::LIST_ITEM);
3396 let marker = item
3397 .as_ref()
3398 .and_then(|i| {
3399 i.children_with_tokens()
3400 .filter_map(|el| el.into_token())
3401 .find(|t| t.kind() == SyntaxKind::LIST_MARKER)
3402 .map(|t| t.text().to_string())
3403 })
3404 .unwrap_or_default();
3405 let (mut start, style, delim) = classify_ordered_marker(marker.trim());
3406 if style == "Example" {
3407 let offset: u32 = node.text_range().start().into();
3408 if let Some(s) = REFS_CTX.with(|c| {
3409 c.borrow()
3410 .example_list_start_by_offset
3411 .get(&offset)
3412 .copied()
3413 }) {
3414 start = s;
3415 }
3416 }
3417 (start, style, delim)
3418}
3419
3420fn classify_ordered_marker(trimmed: &str) -> (usize, &'static str, &'static str) {
3428 let (body, delim) =
3430 if let Some(inner) = trimmed.strip_prefix('(').and_then(|s| s.strip_suffix(')')) {
3431 (inner, "TwoParens")
3432 } else if let Some(inner) = trimmed.strip_suffix(')') {
3433 (inner, "OneParen")
3434 } else if let Some(inner) = trimmed.strip_suffix('.') {
3435 (inner, "Period")
3436 } else {
3437 (trimmed, "DefaultDelim")
3438 };
3439
3440 if !body.is_empty() && body.chars().all(|c| c.is_ascii_digit()) {
3442 let start: usize = body.parse().unwrap_or(1);
3443 return (start, "Decimal", delim);
3444 }
3445
3446 if body == "#" {
3449 return (1, "DefaultStyle", "DefaultDelim");
3450 }
3451
3452 if let Some(rest) = body.strip_prefix('@')
3454 && rest
3455 .chars()
3456 .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
3457 {
3458 return (1, "Example", delim);
3459 }
3460
3461 if body == "i" {
3463 return (1, "LowerRoman", delim);
3464 }
3465 if body == "I" {
3466 return (1, "UpperRoman", delim);
3467 }
3468
3469 if body.len() == 1
3471 && let Some(c) = body.chars().next()
3472 {
3473 if c.is_ascii_lowercase() {
3474 return ((c as u8 - b'a') as usize + 1, "LowerAlpha", delim);
3475 }
3476 if c.is_ascii_uppercase() {
3477 return ((c as u8 - b'A') as usize + 1, "UpperAlpha", delim);
3478 }
3479 }
3480
3481 if body
3483 .chars()
3484 .all(|c| matches!(c, 'i' | 'v' | 'x' | 'l' | 'c' | 'd' | 'm'))
3485 && let Some(n) = roman_to_int(body, false)
3486 {
3487 return (n, "LowerRoman", delim);
3488 }
3489 if body
3490 .chars()
3491 .all(|c| matches!(c, 'I' | 'V' | 'X' | 'L' | 'C' | 'D' | 'M'))
3492 && let Some(n) = roman_to_int(body, true)
3493 {
3494 return (n, "UpperRoman", delim);
3495 }
3496
3497 (1, "Decimal", delim)
3500}
3501
3502fn roman_to_int(s: &str, upper: bool) -> Option<usize> {
3506 let normalize = |c: char| if upper { c } else { c.to_ascii_uppercase() };
3507 let value = |c: char| match c {
3508 'I' => 1,
3509 'V' => 5,
3510 'X' => 10,
3511 'L' => 50,
3512 'C' => 100,
3513 'D' => 500,
3514 'M' => 1000,
3515 _ => 0,
3516 };
3517 let chars: Vec<char> = s.chars().map(normalize).collect();
3518 if chars.is_empty() {
3519 return None;
3520 }
3521 let mut total = 0usize;
3522 let mut i = 0;
3523 while i < chars.len() {
3524 let v = value(chars[i]);
3525 if v == 0 {
3526 return None;
3527 }
3528 let next = chars.get(i + 1).copied().map(value).unwrap_or(0);
3529 if v < next {
3530 total += next - v;
3531 i += 2;
3532 } else {
3533 total += v;
3534 i += 1;
3535 }
3536 }
3537 Some(total)
3538}
3539
3540fn list_item_blocks(item: &SyntaxNode, loose: bool) -> Vec<Block> {
3541 let mut out = Vec::new();
3542 let item_indent = list_item_content_offset(item);
3543 let task_checkbox = task_checkbox_for_item(item);
3544 let mut checkbox_emitted = false;
3545 for child in item.children() {
3546 match child.kind() {
3547 SyntaxKind::PLAIN => {
3548 let mut inlines = coalesce_inlines(inlines_from(&child));
3549 if inlines.is_empty() {
3554 continue;
3555 }
3556 if !checkbox_emitted && let Some(glyph) = task_checkbox {
3557 inlines.insert(0, Inline::Space);
3558 inlines.insert(0, Inline::Str(glyph.to_string()));
3559 checkbox_emitted = true;
3560 }
3561 if loose {
3562 out.push(Block::Para(inlines));
3563 } else {
3564 out.push(Block::Plain(inlines));
3565 }
3566 }
3567 SyntaxKind::CODE_BLOCK => {
3568 out.push(indented_code_block_with_extra_strip(&child, item_indent));
3576 }
3577 _ => collect_block(&child, &mut out),
3578 }
3579 }
3580 out
3581}
3582
3583fn task_checkbox_for_item(item: &SyntaxNode) -> Option<&'static str> {
3588 item.children_with_tokens()
3589 .filter_map(|el| el.into_token())
3590 .find(|t| t.kind() == SyntaxKind::TASK_CHECKBOX)
3591 .map(|t| {
3592 let text = t.text();
3593 if text.contains('x') || text.contains('X') {
3594 "\u{2612}"
3595 } else {
3596 "\u{2610}"
3597 }
3598 })
3599}
3600
3601fn list_item_content_offset(item: &SyntaxNode) -> usize {
3620 let parent_ws = parent_list_leading_ws(item);
3621 let mut marker_width = 0usize;
3622 let mut leading_ws = 0usize;
3623 let mut saw_marker = false;
3624 for el in item.children_with_tokens() {
3625 if let NodeOrToken::Token(t) = el {
3626 match t.kind() {
3627 SyntaxKind::WHITESPACE if !saw_marker => {
3628 leading_ws += t.text().chars().count();
3629 }
3630 SyntaxKind::LIST_MARKER => {
3631 marker_width += t.text().chars().count();
3632 saw_marker = true;
3633 }
3634 SyntaxKind::WHITESPACE if saw_marker => {
3635 return parent_ws + leading_ws + marker_width + t.text().chars().count();
3636 }
3637 _ if saw_marker => {
3638 return parent_ws + leading_ws + marker_width;
3639 }
3640 _ => {}
3641 }
3642 } else if saw_marker {
3643 return parent_ws + leading_ws + marker_width;
3644 }
3645 }
3646 parent_ws + leading_ws + marker_width
3647}
3648
3649fn parent_list_leading_ws(item: &SyntaxNode) -> usize {
3654 let prev = item.prev_sibling_or_token();
3655 match prev {
3656 Some(NodeOrToken::Token(t)) if t.kind() == SyntaxKind::WHITESPACE => {
3657 t.text().chars().count()
3658 }
3659 _ => 0,
3660 }
3661}
3662
3663fn is_loose_list(node: &SyntaxNode) -> bool {
3664 let mut prev_was_item = false;
3665 for child in node.children_with_tokens() {
3666 if let NodeOrToken::Node(n) = child {
3667 if n.kind() == SyntaxKind::LIST_ITEM {
3668 prev_was_item = true;
3669 } else if n.kind() == SyntaxKind::BLANK_LINE
3670 && prev_was_item
3671 && n.next_sibling()
3672 .map(|s| s.kind() == SyntaxKind::LIST_ITEM)
3673 .unwrap_or(false)
3674 {
3675 return true;
3676 }
3677 }
3678 }
3679 for item in node
3680 .children()
3681 .filter(|c| c.kind() == SyntaxKind::LIST_ITEM)
3682 {
3683 if item.children().any(|c| c.kind() == SyntaxKind::PARAGRAPH) {
3684 return true;
3685 }
3686 if has_internal_blank_between_blocks(&item) {
3691 return true;
3692 }
3693 }
3694 false
3695}
3696
3697fn has_internal_blank_between_blocks(item: &SyntaxNode) -> bool {
3698 let mut saw_block_before = false;
3699 let mut pending_blank = false;
3700 for child in item.children() {
3701 match child.kind() {
3702 SyntaxKind::BLANK_LINE => {
3703 if saw_block_before {
3704 pending_blank = true;
3705 }
3706 }
3707 SyntaxKind::PLAIN if child_is_empty_plain(&child) => {}
3711 _ => {
3712 if pending_blank {
3713 return true;
3714 }
3715 saw_block_before = true;
3716 }
3717 }
3718 }
3719 false
3720}
3721
3722fn child_is_empty_plain(node: &SyntaxNode) -> bool {
3723 !node.children_with_tokens().any(|el| match el {
3724 NodeOrToken::Token(t) => !matches!(t.kind(), SyntaxKind::NEWLINE | SyntaxKind::WHITESPACE),
3725 NodeOrToken::Node(_) => true,
3726 })
3727}
3728
3729fn inlines_from(parent: &SyntaxNode) -> Vec<Inline> {
3732 let mut out = Vec::new();
3733 let mut iter = parent.children_with_tokens().peekable();
3734 while let Some(el) = iter.next() {
3735 match el {
3736 NodeOrToken::Token(t) => push_token_inline(&t, &mut out),
3737 NodeOrToken::Node(n) if n.kind() == SyntaxKind::LATEX_COMMAND => {
3738 emit_latex_command_with_absorb(&n, &mut iter, &mut out);
3739 }
3740 NodeOrToken::Node(n) if n.kind() == SyntaxKind::CITATION => {
3741 emit_citation_with_absorb(&n, &mut iter, &mut out);
3742 }
3743 NodeOrToken::Node(n) => push_inline_node(&n, &mut out),
3744 }
3745 }
3746 while matches!(out.last(), Some(Inline::SoftBreak)) {
3750 out.pop();
3751 }
3752 out
3753}
3754
3755fn emit_citation_with_absorb<I>(
3763 node: &SyntaxNode,
3764 iter: &mut std::iter::Peekable<I>,
3765 out: &mut Vec<Inline>,
3766) where
3767 I: Iterator<Item = rowan::SyntaxElement<crate::syntax::PanacheLanguage>>,
3768{
3769 let bracketed = node
3770 .children_with_tokens()
3771 .filter_map(|el| el.into_token())
3772 .any(|t| t.kind() == SyntaxKind::LINK_START);
3773 if bracketed {
3774 render_citation_inline(node, out, None);
3775 return;
3776 }
3777 let next_sibling_pair = node.next_sibling_or_token().and_then(|el1| {
3781 let t = el1.as_token().cloned()?;
3782 if t.kind() != SyntaxKind::TEXT || !t.text().starts_with(' ') {
3783 return None;
3784 }
3785 let space_text = t.text().to_string();
3786 let link_el = t.next_sibling_or_token()?;
3787 let link = link_el.as_node().cloned()?;
3788 if link.kind() != SyntaxKind::LINK && link.kind() != SyntaxKind::UNRESOLVED_REFERENCE {
3793 return None;
3794 }
3795 let has_dest = link
3796 .children_with_tokens()
3797 .filter_map(|el| el.into_token())
3798 .any(|tok| tok.kind() == SyntaxKind::LINK_DEST_START);
3799 if has_dest {
3800 return None;
3801 }
3802 let link_text = link
3803 .children()
3804 .find(|c| c.kind() == SyntaxKind::LINK_TEXT)
3805 .map(|tt| tt.text().to_string())
3806 .unwrap_or_default();
3807 Some((space_text, link_text))
3808 });
3809 if let Some((_space_text, locator_text)) = next_sibling_pair {
3810 iter.next();
3812 iter.next();
3813 render_citation_inline(node, out, Some(&locator_text));
3814 } else {
3815 render_citation_inline(node, out, None);
3816 }
3817}
3818
3819fn emit_latex_command_with_absorb<I>(
3826 node: &SyntaxNode,
3827 iter: &mut std::iter::Peekable<I>,
3828 out: &mut Vec<Inline>,
3829) where
3830 I: Iterator<Item = rowan::SyntaxElement<crate::syntax::PanacheLanguage>>,
3831{
3832 let mut content = node.text().to_string();
3833 let ends_in_letter = content
3834 .chars()
3835 .next_back()
3836 .is_some_and(|c| c.is_ascii_alphabetic());
3837 if ends_in_letter
3838 && let Some(NodeOrToken::Token(t)) = iter.peek()
3839 && t.kind() == SyntaxKind::TEXT
3840 {
3841 let text = t.text().to_string();
3842 let bytes = text.as_bytes();
3843 let mut absorbed = 0;
3844 while absorbed < bytes.len() && (bytes[absorbed] == b' ' || bytes[absorbed] == b'\t') {
3845 absorbed += 1;
3846 }
3847 if absorbed > 0 {
3848 content.push_str(&text[..absorbed]);
3849 out.push(Inline::RawInline("tex".to_string(), content));
3850 iter.next();
3851 let remainder = &text[absorbed..];
3852 if !remainder.is_empty() {
3853 push_text(remainder, out);
3854 }
3855 return;
3856 }
3857 }
3858 out.push(Inline::RawInline("tex".to_string(), content));
3859}
3860
3861fn push_inline_node(node: &SyntaxNode, out: &mut Vec<Inline>) {
3862 match node.kind() {
3863 SyntaxKind::LINK => render_link_inline(node, out),
3864 SyntaxKind::IMAGE_LINK => render_image_inline(node, out),
3865 SyntaxKind::CITATION => render_citation_inline(node, out, None),
3866 SyntaxKind::UNRESOLVED_REFERENCE => render_unresolved_reference_inline(node, out),
3874 _ => out.push(inline_from_node(node)),
3875 }
3876}
3877
3878fn render_unresolved_reference_inline(node: &SyntaxNode, out: &mut Vec<Inline>) {
3891 let is_image = node
3892 .children()
3893 .any(|c| c.kind() == SyntaxKind::IMAGE_LINK_START);
3894 let text_node = if is_image {
3895 node.children().find(|c| c.kind() == SyntaxKind::IMAGE_ALT)
3896 } else {
3897 node.children().find(|c| c.kind() == SyntaxKind::LINK_TEXT)
3898 };
3899 let ref_node = node.children().find(|c| c.kind() == SyntaxKind::LINK_REF);
3900
3901 let text_label = text_node
3902 .as_ref()
3903 .map(|n| n.text().to_string())
3904 .unwrap_or_default();
3905 let (label, has_second_brackets, second_inner) = match ref_node.as_ref() {
3906 Some(rn) => {
3907 let inner = rn.text().to_string();
3908 if inner.is_empty() {
3909 (text_label.clone(), true, String::new())
3910 } else {
3911 (inner.clone(), true, inner)
3912 }
3913 }
3914 None => (text_label.clone(), false, String::new()),
3915 };
3916
3917 if !is_image && let Some(id) = lookup_heading_id(&label) {
3920 let url = format!("#{id}");
3921 let resolved_text_inlines = text_node
3922 .as_ref()
3923 .map(|n| coalesce_inlines(inlines_from(n)))
3924 .unwrap_or_default();
3925 out.push(Inline::Link(
3926 extract_attr_from_node(node),
3927 resolved_text_inlines,
3928 url,
3929 String::new(),
3930 ));
3931 return;
3932 }
3933
3934 if let Some((url, title)) = lookup_ref(&label) {
3940 let resolved_text_inlines = text_node
3941 .as_ref()
3942 .map(|n| coalesce_inlines(inlines_from(n)))
3943 .unwrap_or_default();
3944 let kind = if is_image {
3945 Inline::Image
3946 } else {
3947 Inline::Link
3948 };
3949 out.push(kind(
3950 extract_attr_from_node(node),
3951 resolved_text_inlines,
3952 url,
3953 title,
3954 ));
3955 return;
3956 }
3957
3958 let unresolved_text_inlines = text_node
3961 .as_ref()
3962 .map(|n| coalesce_inlines_keep_edges(inlines_from(n)))
3963 .unwrap_or_default();
3964 let opener = if is_image { "![" } else { "[" };
3965 out.push(Inline::Str(opener.to_string()));
3966 out.extend(unresolved_text_inlines);
3967 let suffix = if has_second_brackets {
3968 format!("][{second_inner}]")
3969 } else {
3970 "]".to_string()
3971 };
3972 out.push(Inline::Str(suffix));
3973}
3974
3975fn render_citation_inline(
3984 node: &SyntaxNode,
3985 out: &mut Vec<Inline>,
3986 extra_suffix_text: Option<&str>,
3987) {
3988 let first_key = node
3990 .children_with_tokens()
3991 .filter_map(|el| el.into_token())
3992 .find(|t| t.kind() == SyntaxKind::CITATION_KEY)
3993 .map(|t| t.text().to_string())
3994 .unwrap_or_default();
3995 let example_resolution =
3996 REFS_CTX.with(|c| c.borrow().example_label_to_num.get(&first_key).copied());
3997 if let Some(n) = example_resolution {
3998 out.push(Inline::Str(n.to_string()));
3999 return;
4000 }
4001
4002 let bracketed = node
4003 .children_with_tokens()
4004 .filter_map(|el| el.into_token())
4005 .any(|t| t.kind() == SyntaxKind::LINK_START);
4006
4007 let mut builders: Vec<CitationBuilder> = Vec::new();
4008 let mut current: Option<CitationBuilder> = None;
4009 let mut pending_prefix = String::new();
4010 for el in node.children_with_tokens() {
4011 let token = match el {
4012 NodeOrToken::Token(t) => t,
4013 _ => continue,
4014 };
4015 match token.kind() {
4016 SyntaxKind::LINK_START | SyntaxKind::LINK_DEST => {}
4017 SyntaxKind::CITATION_BRACE_OPEN | SyntaxKind::CITATION_BRACE_CLOSE => {}
4018 SyntaxKind::CITATION_MARKER => {
4019 if let Some(c) = current.take() {
4020 builders.push(c);
4021 }
4022 let mode = if token.text() == "-@" {
4023 CitationMode::SuppressAuthor
4024 } else if bracketed {
4025 CitationMode::NormalCitation
4026 } else {
4027 CitationMode::AuthorInText
4028 };
4029 current = Some(CitationBuilder::new(
4030 std::mem::take(&mut pending_prefix),
4031 mode,
4032 ));
4033 }
4034 SyntaxKind::CITATION_KEY => {
4035 if let Some(c) = &mut current {
4036 c.id.push_str(token.text());
4037 }
4038 }
4039 SyntaxKind::CITATION_CONTENT => {
4040 if let Some(c) = &mut current {
4041 c.suffix_raw.push_str(token.text());
4042 } else {
4043 pending_prefix.push_str(token.text());
4044 }
4045 }
4046 SyntaxKind::CITATION_SEPARATOR => {
4047 if let Some(c) = current.take() {
4048 builders.push(c);
4049 }
4050 }
4051 _ => {}
4052 }
4053 }
4054 if let Some(c) = current.take() {
4055 builders.push(c);
4056 }
4057
4058 if let Some(extra) = extra_suffix_text
4062 && let Some(last) = builders.last_mut()
4063 {
4064 if !last.suffix_raw.is_empty() && !extra.starts_with(' ') {
4065 last.suffix_raw.push(' ');
4066 }
4067 last.suffix_raw.push_str(extra);
4068 }
4069
4070 let note_offset: u32 = node.text_range().start().into();
4071 let note_num = REFS_CTX
4072 .with(|c| {
4073 c.borrow()
4074 .cite_note_num_by_offset
4075 .get(¬e_offset)
4076 .copied()
4077 })
4078 .unwrap_or(1);
4079
4080 let projected: Vec<Citation> = builders
4081 .into_iter()
4082 .map(|b| b.into_citation(note_num))
4083 .collect();
4084
4085 let mut literal = node.text().to_string();
4087 if let Some(extra) = extra_suffix_text {
4088 literal.push(' ');
4089 literal.push('[');
4090 literal.push_str(extra);
4091 literal.push(']');
4092 }
4093 let text_inlines = literal_inlines(&literal);
4094
4095 out.push(Inline::Cite(projected, text_inlines));
4096}
4097
4098struct CitationBuilder {
4104 id: String,
4105 prefix_raw: String,
4106 suffix_raw: String,
4107 mode: CitationMode,
4108}
4109
4110impl CitationBuilder {
4111 fn new(prefix_raw: String, mode: CitationMode) -> Self {
4112 Self {
4113 id: String::new(),
4114 prefix_raw,
4115 suffix_raw: String::new(),
4116 mode,
4117 }
4118 }
4119
4120 fn into_citation(self, note_num: i64) -> Citation {
4121 let prefix = parse_cite_affix_inlines(self.prefix_raw.trim_end(), true);
4122 let suffix = parse_cite_affix_inlines(&self.suffix_raw, false);
4123 Citation {
4124 id: self.id,
4125 prefix,
4126 suffix,
4127 mode: self.mode,
4128 note_num,
4129 hash: 0,
4130 }
4131 }
4132}
4133
4134fn parse_cite_affix_inlines(raw: &str, is_prefix: bool) -> Vec<Inline> {
4146 if raw.is_empty() {
4147 return Vec::new();
4148 }
4149 let trimmed = if is_prefix { raw.trim_start() } else { raw };
4150 if trimmed.is_empty() {
4151 return Vec::new();
4152 }
4153 let leading_space = !is_prefix && trimmed.starts_with([' ', '\t']);
4154 let work = trimmed.trim_start_matches([' ', '\t']);
4155 if work.is_empty() {
4156 return if leading_space {
4157 vec![Inline::Space]
4158 } else {
4159 Vec::new()
4160 };
4161 }
4162 let wrapped = format!("Z {work}");
4163 let inlines = parse_cell_text_inlines(&wrapped);
4164 let mut coalesced = coalesce_inlines(inlines);
4165 if matches!(coalesced.first(), Some(Inline::Str(s)) if s == "Z") {
4167 coalesced.remove(0);
4168 if matches!(coalesced.first(), Some(Inline::Space)) {
4169 coalesced.remove(0);
4170 }
4171 }
4172 if leading_space {
4173 coalesced.insert(0, Inline::Space);
4174 }
4175 coalesced
4176}
4177
4178fn literal_inlines(text: &str) -> Vec<Inline> {
4184 let mut out: Vec<Inline> = Vec::new();
4185 let mut buf = String::new();
4186 for ch in text.chars() {
4187 match ch {
4188 ' ' | '\t' => {
4189 if !buf.is_empty() {
4190 out.push(Inline::Str(std::mem::take(&mut buf)));
4191 }
4192 if !matches!(out.last(), Some(Inline::Space) | Some(Inline::SoftBreak)) {
4193 out.push(Inline::Space);
4194 }
4195 }
4196 '\n' => {
4197 if !buf.is_empty() {
4198 out.push(Inline::Str(std::mem::take(&mut buf)));
4199 }
4200 if matches!(out.last(), Some(Inline::Space)) {
4201 out.pop();
4202 }
4203 out.push(Inline::SoftBreak);
4204 }
4205 _ => buf.push(ch),
4206 }
4207 }
4208 if !buf.is_empty() {
4209 out.push(Inline::Str(buf));
4210 }
4211 out
4212}
4213
4214fn push_token_inline(
4215 t: &rowan::SyntaxToken<crate::syntax::PanacheLanguage>,
4216 out: &mut Vec<Inline>,
4217) {
4218 match t.kind() {
4219 SyntaxKind::TEXT => push_text(t.text(), out),
4220 SyntaxKind::WHITESPACE => out.push(Inline::Space),
4221 SyntaxKind::NEWLINE => out.push(Inline::SoftBreak),
4222 SyntaxKind::HARD_LINE_BREAK => out.push(Inline::LineBreak),
4223 SyntaxKind::ESCAPED_CHAR => {
4224 let s: String = t.text().chars().skip(1).collect();
4226 out.push(Inline::Str(s));
4227 }
4228 SyntaxKind::NONBREAKING_SPACE => out.push(Inline::Str("\u{a0}".to_string())),
4229 _ => {}
4232 }
4233}
4234
4235fn push_text(text: &str, out: &mut Vec<Inline>) {
4236 let mut buf = String::new();
4237 for ch in text.chars() {
4238 if ch == ' ' || ch == '\t' {
4239 if !buf.is_empty() {
4240 out.push(Inline::Str(std::mem::take(&mut buf)));
4241 }
4242 out.push(Inline::Space);
4243 } else if ch == '\n' {
4244 if !buf.is_empty() {
4245 out.push(Inline::Str(std::mem::take(&mut buf)));
4246 }
4247 out.push(Inline::SoftBreak);
4248 } else {
4249 buf.push(ch);
4250 }
4251 }
4252 if !buf.is_empty() {
4253 out.push(Inline::Str(buf));
4254 }
4255}
4256
4257fn inline_from_node(node: &SyntaxNode) -> Inline {
4258 match node.kind() {
4259 SyntaxKind::EMPHASIS => {
4260 Inline::Emph(coalesce_inlines_keep_edges(inlines_from_marked(node)))
4261 }
4262 SyntaxKind::STRONG => {
4263 Inline::Strong(coalesce_inlines_keep_edges(inlines_from_marked(node)))
4264 }
4265 SyntaxKind::STRIKEOUT => {
4266 Inline::Strikeout(coalesce_inlines_keep_edges(inlines_from_marked(node)))
4267 }
4268 SyntaxKind::SUPERSCRIPT => {
4269 Inline::Superscript(coalesce_inlines_keep_edges(inlines_from_marked(node)))
4270 }
4271 SyntaxKind::SUBSCRIPT => {
4272 Inline::Subscript(coalesce_inlines_keep_edges(inlines_from_marked(node)))
4273 }
4274 SyntaxKind::INLINE_CODE => {
4275 let content: String = node
4276 .children_with_tokens()
4277 .filter_map(|el| el.into_token())
4278 .filter(|t| t.kind() == SyntaxKind::INLINE_CODE_CONTENT)
4279 .map(|t| t.text().to_string())
4280 .collect();
4281 Inline::Code(
4282 extract_attr_from_node(node),
4283 strip_inline_code_padding(&content),
4284 )
4285 }
4286 SyntaxKind::LINK | SyntaxKind::IMAGE_LINK | SyntaxKind::UNRESOLVED_REFERENCE => {
4287 Inline::Unsupported(format!("{:?}", node.kind()))
4294 }
4295 SyntaxKind::AUTO_LINK => autolink_inline(node),
4296 SyntaxKind::INLINE_MATH => math_inline(node, "InlineMath"),
4297 SyntaxKind::DISPLAY_MATH => math_inline(node, "DisplayMath"),
4298 SyntaxKind::LATEX_COMMAND => latex_command_inline(node),
4299 SyntaxKind::BRACKETED_SPAN => bracketed_span_inline(node),
4300 SyntaxKind::INLINE_HTML_SPAN => inline_html_span_inline(node),
4301 SyntaxKind::INLINE_HTML => Inline::RawInline("html".to_string(), node.text().to_string()),
4302 SyntaxKind::FOOTNOTE_REFERENCE => footnote_reference_inline(node),
4303 SyntaxKind::INLINE_FOOTNOTE => inline_footnote_inline(node),
4304 other => Inline::Unsupported(format!("{other:?}")),
4305 }
4306}
4307
4308fn inlines_from_marked(parent: &SyntaxNode) -> Vec<Inline> {
4312 let mut out = Vec::new();
4313 let mut iter = parent.children_with_tokens().peekable();
4314 while let Some(el) = iter.next() {
4315 match el {
4316 NodeOrToken::Token(t) => match t.kind() {
4317 SyntaxKind::EMPHASIS_MARKER
4318 | SyntaxKind::STRONG_MARKER
4319 | SyntaxKind::STRIKEOUT_MARKER
4320 | SyntaxKind::SUPERSCRIPT_MARKER
4321 | SyntaxKind::SUBSCRIPT_MARKER
4322 | SyntaxKind::MARK_MARKER => {}
4323 _ => push_token_inline(&t, &mut out),
4324 },
4325 NodeOrToken::Node(n) => match n.kind() {
4326 SyntaxKind::EMPHASIS_MARKER
4327 | SyntaxKind::STRONG_MARKER
4328 | SyntaxKind::STRIKEOUT_MARKER
4329 | SyntaxKind::SUPERSCRIPT_MARKER
4330 | SyntaxKind::SUBSCRIPT_MARKER
4331 | SyntaxKind::MARK_MARKER => {}
4332 _ if n.kind() == SyntaxKind::LATEX_COMMAND => {
4333 emit_latex_command_with_absorb(&n, &mut iter, &mut out);
4334 }
4335 _ => push_inline_node(&n, &mut out),
4336 },
4337 }
4338 }
4339 out
4340}
4341
4342fn render_link_inline(node: &SyntaxNode, out: &mut Vec<Inline>) {
4343 let text_node = node.children().find(|c| c.kind() == SyntaxKind::LINK_TEXT);
4344 let dest_node = node.children().find(|c| c.kind() == SyntaxKind::LINK_DEST);
4345 let has_dest_paren = node
4346 .children_with_tokens()
4347 .any(|el| matches!(el, NodeOrToken::Token(t) if t.kind() == SyntaxKind::LINK_DEST_START));
4348
4349 if has_dest_paren {
4350 let text = text_node
4351 .as_ref()
4352 .map(|n| coalesce_inlines(inlines_from(n)))
4353 .unwrap_or_default();
4354 let (url, title) = dest_node
4355 .as_ref()
4356 .map(parse_link_dest)
4357 .unwrap_or((String::new(), String::new()));
4358 out.push(Inline::Link(extract_attr_from_node(node), text, url, title));
4359 return;
4360 }
4361
4362 let ref_node = node.children().find(|c| c.kind() == SyntaxKind::LINK_REF);
4365 let resolved_text_inlines = text_node
4366 .as_ref()
4367 .map(|n| coalesce_inlines(inlines_from(n)))
4368 .unwrap_or_default();
4369 let text_label = text_node
4370 .as_ref()
4371 .map(|n| n.text().to_string())
4372 .unwrap_or_default();
4373
4374 let (label, has_second_brackets, second_inner) = match ref_node.as_ref() {
4375 Some(rn) => {
4376 let inner = rn.text().to_string();
4377 if inner.is_empty() {
4378 (text_label.clone(), true, String::new())
4379 } else {
4380 (inner.clone(), true, inner)
4381 }
4382 }
4383 None => (text_label.clone(), false, String::new()),
4384 };
4385
4386 if let Some((url, title)) = lookup_ref(&label) {
4387 out.push(Inline::Link(
4388 extract_attr_from_node(node),
4389 resolved_text_inlines,
4390 url,
4391 title,
4392 ));
4393 return;
4394 }
4395
4396 if let Some(id) = lookup_heading_id(&label) {
4397 let url = format!("#{id}");
4398 out.push(Inline::Link(
4399 extract_attr_from_node(node),
4400 resolved_text_inlines,
4401 url,
4402 String::new(),
4403 ));
4404 return;
4405 }
4406
4407 let unresolved_text_inlines = text_node
4416 .as_ref()
4417 .map(|n| coalesce_inlines_keep_edges(inlines_from(n)))
4418 .unwrap_or_default();
4419 out.push(Inline::Str("[".to_string()));
4420 out.extend(unresolved_text_inlines);
4421 let suffix = if has_second_brackets {
4422 format!("][{second_inner}]")
4423 } else {
4424 "]".to_string()
4425 };
4426 out.push(Inline::Str(suffix));
4427}
4428
4429fn render_image_inline(node: &SyntaxNode, out: &mut Vec<Inline>) {
4430 let alt_node = node.children().find(|c| c.kind() == SyntaxKind::IMAGE_ALT);
4431 let dest_node = node.children().find(|c| c.kind() == SyntaxKind::LINK_DEST);
4432 let has_dest_paren = node.children_with_tokens().any(|el| {
4433 matches!(el, NodeOrToken::Token(t) if t.kind() == SyntaxKind::IMAGE_DEST_START
4434 || t.kind() == SyntaxKind::LINK_DEST_START)
4435 });
4436
4437 if has_dest_paren {
4438 let alt = alt_node
4439 .as_ref()
4440 .map(|n| coalesce_inlines(inlines_from(n)))
4441 .unwrap_or_default();
4442 let (url, title) = dest_node
4443 .as_ref()
4444 .map(parse_link_dest)
4445 .unwrap_or((String::new(), String::new()));
4446 out.push(Inline::Image(extract_attr_from_node(node), alt, url, title));
4447 return;
4448 }
4449
4450 let ref_node = node.children().find(|c| c.kind() == SyntaxKind::LINK_REF);
4451 let alt_inlines = alt_node
4452 .as_ref()
4453 .map(|n| coalesce_inlines(inlines_from(n)))
4454 .unwrap_or_default();
4455 let alt_label = alt_node
4456 .as_ref()
4457 .map(|n| n.text().to_string())
4458 .unwrap_or_default();
4459
4460 let (label, has_second_brackets, second_inner) = match ref_node.as_ref() {
4461 Some(rn) => {
4462 let inner = rn.text().to_string();
4463 if inner.is_empty() {
4464 (alt_label.clone(), true, String::new())
4465 } else {
4466 (inner.clone(), true, inner)
4467 }
4468 }
4469 None => (alt_label.clone(), false, String::new()),
4470 };
4471
4472 if let Some((url, title)) = lookup_ref(&label) {
4473 out.push(Inline::Image(
4474 extract_attr_from_node(node),
4475 alt_inlines,
4476 url,
4477 title,
4478 ));
4479 return;
4480 }
4481
4482 if let Some(id) = lookup_heading_id(&label) {
4483 let url = format!("#{id}");
4484 out.push(Inline::Image(
4485 extract_attr_from_node(node),
4486 alt_inlines,
4487 url,
4488 String::new(),
4489 ));
4490 return;
4491 }
4492
4493 out.push(Inline::Str("![".to_string()));
4494 out.extend(alt_inlines);
4495 let suffix = if has_second_brackets {
4496 format!("][{second_inner}]")
4497 } else {
4498 "]".to_string()
4499 };
4500 out.push(Inline::Str(suffix));
4501}
4502
4503fn strip_inline_code_padding(s: &str) -> String {
4508 let collapsed: String = s.chars().map(|c| if c == '\n' { ' ' } else { c }).collect();
4509 collapsed.trim().to_string()
4510}
4511
4512fn math_inline(node: &SyntaxNode, kind: &'static str) -> Inline {
4513 let content = crate::syntax::math::math_content_text(node);
4517 Inline::Math(kind, content)
4518}
4519
4520fn autolink_inline(node: &SyntaxNode) -> Inline {
4521 let mut url = String::new();
4522 for el in node.children_with_tokens() {
4523 if let NodeOrToken::Token(t) = el
4524 && t.kind() == SyntaxKind::TEXT
4525 {
4526 url.push_str(t.text());
4527 }
4528 }
4529 let is_email = !url.contains("://") && !url.starts_with("mailto:") && url.contains('@');
4532 if is_email {
4533 let attr = Attr {
4534 id: String::new(),
4535 classes: vec!["email".to_string()],
4536 kvs: Vec::new(),
4537 };
4538 let dest = format!("mailto:{url}");
4539 return Inline::Link(attr, vec![Inline::Str(url)], dest, String::new());
4540 }
4541 if !is_known_uri_scheme(&url) {
4545 return Inline::RawInline("html".to_string(), node.text().to_string());
4546 }
4547 let attr = Attr {
4548 id: String::new(),
4549 classes: vec!["uri".to_string()],
4550 kvs: Vec::new(),
4551 };
4552 Inline::Link(attr, vec![Inline::Str(url.clone())], url, String::new())
4553}
4554
4555fn is_known_uri_scheme(url: &str) -> bool {
4558 let scheme_end = url.find(':');
4559 let Some(end) = scheme_end else {
4560 return false;
4561 };
4562 let scheme = url[..end].to_ascii_lowercase();
4563 PANDOC_KNOWN_SCHEMES.binary_search(&scheme.as_str()).is_ok()
4564}
4565
4566#[rustfmt::skip]
4569const PANDOC_KNOWN_SCHEMES: &[&str] = &[
4570 "aaa", "aaas", "about", "acap", "acct", "acr",
4571 "adiumxtra", "afp", "afs", "aim", "appdata", "apt",
4572 "attachment", "aw", "barion", "beshare", "bitcoin", "blob",
4573 "bolo", "browserext", "callto", "cap", "chrome", "chrome-extension",
4574 "cid", "coap", "coaps", "com-eventbrite-attendee", "content", "crid",
4575 "cvs", "data", "dav", "dict", "dis", "dlna-playcontainer",
4576 "dlna-playsingle", "dns", "dntp", "doi", "dtn", "dvb",
4577 "ed2k", "example", "facetime", "fax", "feed", "feedready",
4578 "file", "filesystem", "finger", "fish", "ftp", "gemini",
4579 "geo", "gg", "git", "gizmoproject", "go", "gopher",
4580 "graph", "gtalk", "h323", "ham", "hcp", "http",
4581 "https", "hxxp", "hxxps", "hydrazone", "iax", "icap",
4582 "icon", "im", "imap", "info", "iotdisco", "ipn",
4583 "ipp", "ipps", "irc", "irc6", "ircs", "iris",
4584 "iris.beep", "iris.lwz", "iris.xpc", "iris.xpcs", "isbn", "isostore",
4585 "itms", "jabber", "jar", "javascript", "jms", "keyparc",
4586 "lastfm", "ldap", "ldaps", "lvlt", "magnet", "mailserver",
4587 "mailto", "maps", "market", "message", "mid", "mms",
4588 "modem", "mongodb", "moz", "ms-access", "ms-browser-extension", "ms-drive-to",
4589 "ms-enrollment", "ms-excel", "ms-gamebarservices", "ms-getoffice", "ms-help", "ms-infopath",
4590 "ms-media-stream-id", "ms-officeapp", "ms-powerpoint", "ms-project", "ms-publisher", "ms-search-repair",
4591 "ms-secondary-screen-controller", "ms-secondary-screen-setup", "ms-settings", "ms-settings-airplanemode", "ms-settings-bluetooth", "ms-settings-camera",
4592 "ms-settings-cellular", "ms-settings-cloudstorage", "ms-settings-connectabledevices", "ms-settings-displays-topology", "ms-settings-emailandaccounts", "ms-settings-language",
4593 "ms-settings-location", "ms-settings-lock", "ms-settings-nfctransactions", "ms-settings-notifications", "ms-settings-power", "ms-settings-privacy",
4594 "ms-settings-proximity", "ms-settings-screenrotation", "ms-settings-wifi", "ms-settings-workplace", "ms-spd", "ms-sttoverlay",
4595 "ms-transit-to", "ms-virtualtouchpad", "ms-visio", "ms-walk-to", "ms-whiteboard", "ms-whiteboard-cmd",
4596 "ms-word", "msnim", "msrp", "msrps", "mtqp", "mumble",
4597 "mupdate", "mvn", "news", "nfs", "ni", "nih",
4598 "nntp", "notes", "ocf", "oid", "onenote", "onenote-cmd",
4599 "opaquelocktoken", "pack", "palm", "paparazzi", "pkcs11", "platform",
4600 "pmid", "pop", "pres", "prospero", "proxy", "psyc",
4601 "pwid", "qb", "query", "redis", "rediss", "reload",
4602 "res", "resource", "rmi", "rsync", "rtmfp", "rtmp",
4603 "rtsp", "rtsps", "rtspu", "secondlife", "service", "session",
4604 "sftp", "sgn", "shttp", "sieve", "sip", "sips",
4605 "skype", "smb", "sms", "smtp", "snews", "snmp",
4606 "soap.beep", "soap.beeps", "soldat", "spotify", "ssh", "steam",
4607 "stun", "stuns", "submit", "svn", "tag", "teamspeak",
4608 "tel", "teliaeid", "telnet", "tftp", "things", "thismessage",
4609 "tip", "tn3270", "tool", "turn", "turns", "tv",
4610 "udp", "unreal", "urn", "ut2004", "v-event", "vemmi",
4611 "ventrilo", "videotex", "view-source", "vnc", "wais", "webcal",
4612 "wpid", "ws", "wss", "wtai", "wyciwyg", "xcon",
4613 "xcon-userid", "xfire", "xmlrpc.beep", "xmlrpc.beeps", "xmpp", "xri",
4614 "ymsgr", "z39.50", "z39.50r", "z39.50s",
4615];
4616
4617fn footnote_reference_inline(node: &SyntaxNode) -> Inline {
4618 let Some(label) = footnote_label(node) else {
4619 return Inline::Unsupported("FOOTNOTE_REFERENCE".to_string());
4620 };
4621 let blocks = REFS_CTX.with(|c| {
4622 c.borrow()
4623 .footnotes
4624 .get(&label)
4625 .map(|bs| bs.iter().map(clone_block).collect::<Vec<_>>())
4626 });
4627 match blocks {
4628 Some(bs) => Inline::Note(bs),
4629 None => Inline::Str(node.text().to_string()),
4632 }
4633}
4634
4635fn inline_footnote_inline(node: &SyntaxNode) -> Inline {
4636 let inlines = coalesce_inlines(inlines_from(node));
4637 if inlines.is_empty() {
4638 Inline::Note(Vec::new())
4639 } else {
4640 Inline::Note(vec![Block::Para(inlines)])
4641 }
4642}
4643
4644fn parse_link_dest(node: &SyntaxNode) -> (String, String) {
4645 let raw = node.text().to_string();
4649 let trimmed = raw.trim();
4650 if let Some(rest) = trimmed.strip_prefix('<')
4653 && let Some(end) = rest.find('>')
4654 {
4655 let url = &rest[..end];
4656 let after = rest[end + 1..].trim();
4657 let title = parse_dest_title(after);
4658 return (escape_link_dest(url), title);
4659 }
4660 let bytes = trimmed.as_bytes();
4664 let mut url_end = trimmed.len();
4665 let mut i = 0;
4666 while i < bytes.len() {
4667 if matches!(bytes[i], b' ' | b'\t' | b'\n') {
4668 let mut j = i;
4669 while j < bytes.len() && matches!(bytes[j], b' ' | b'\t' | b'\n') {
4670 j += 1;
4671 }
4672 if j < bytes.len() && matches!(bytes[j], b'"' | b'\'' | b'(') {
4673 url_end = i;
4674 break;
4675 }
4676 i = j;
4677 } else {
4678 i += 1;
4679 }
4680 }
4681 let url_raw = &trimmed[..url_end];
4682 let title = parse_dest_title(trimmed[url_end..].trim());
4683 (escape_link_dest(url_raw), title)
4684}
4685
4686fn escape_link_dest(s: &str) -> String {
4690 let mut out = String::with_capacity(s.len());
4691 for ch in s.chars() {
4692 let needs_escape = ch.is_whitespace()
4693 || matches!(
4694 ch,
4695 '<' | '>' | '|' | '"' | '{' | '}' | '[' | ']' | '^' | '`'
4696 );
4697 if needs_escape {
4698 let mut buf = [0u8; 4];
4699 for &b in ch.encode_utf8(&mut buf).as_bytes() {
4700 out.push_str(&format!("%{b:02X}"));
4701 }
4702 } else {
4703 out.push(ch);
4704 }
4705 }
4706 out
4707}
4708
4709fn parse_dest_title(s: &str) -> String {
4710 let bytes = s.as_bytes();
4711 if bytes.is_empty() {
4712 return String::new();
4713 }
4714 let (open, close) = match bytes[0] {
4715 b'"' => (b'"', b'"'),
4716 b'\'' => (b'\'', b'\''),
4717 b'(' => (b'(', b')'),
4718 _ => return String::new(),
4719 };
4720 if !s.starts_with(open as char) {
4721 return String::new();
4722 }
4723 if let Some(end) = s[1..].rfind(close as char) {
4724 return s[1..1 + end].to_string();
4725 }
4726 String::new()
4727}
4728
4729fn coalesce_inlines(input: Vec<Inline>) -> Vec<Inline> {
4732 coalesce_inlines_inner(input, true)
4733}
4734
4735fn coalesce_inlines_keep_edges(input: Vec<Inline>) -> Vec<Inline> {
4740 coalesce_inlines_inner(input, false)
4741}
4742
4743fn coalesce_inlines_inner(input: Vec<Inline>, trim_edges: bool) -> Vec<Inline> {
4744 let mut out: Vec<Inline> = Vec::with_capacity(input.len());
4745 for inline in input {
4746 if let Inline::Str(s) = inline {
4747 if let Some(Inline::Str(prev)) = out.last_mut() {
4748 prev.push_str(&s);
4749 } else {
4750 out.push(Inline::Str(s));
4751 }
4752 } else if let Inline::Space = inline {
4753 if matches!(out.last(), Some(Inline::Space) | Some(Inline::SoftBreak)) {
4756 continue;
4757 }
4758 out.push(Inline::Space);
4759 } else if let Inline::SoftBreak = inline {
4760 if matches!(out.last(), Some(Inline::Space)) {
4763 out.pop();
4764 }
4765 out.push(Inline::SoftBreak);
4766 } else {
4767 out.push(inline);
4768 }
4769 }
4770 if trim_edges {
4771 while matches!(out.first(), Some(Inline::Space) | Some(Inline::SoftBreak)) {
4774 out.remove(0);
4775 }
4776 while matches!(out.last(), Some(Inline::Space) | Some(Inline::SoftBreak)) {
4777 out.pop();
4778 }
4779 }
4780 for inline in out.iter_mut() {
4784 if let Inline::Str(s) = inline {
4785 let mut t = smart_intraword_apostrophe(s);
4786 t = smart_dashes_and_ellipsis(&t);
4787 *s = t;
4788 }
4789 }
4790 let out = smart_quote_pairs(out);
4791 apply_abbreviations(out)
4792}
4793
4794const PANDOC_ABBREVIATIONS: &[&str] = &[
4800 "Apr.", "Aug.", "Bros.", "Capt.", "Co.", "Corp.", "Dec.", "Dr.", "Feb.", "Fr.", "Gen.", "Gov.",
4801 "Hon.", "Inc.", "Jan.", "Jr.", "Jul.", "Jun.", "Ltd.", "M.A.", "M.D.", "Mar.", "Mr.", "Mrs.",
4802 "Ms.", "No.", "Nov.", "Oct.", "Ph.D.", "Pres.", "Prof.", "Rep.", "Rev.", "Sen.", "Sep.",
4803 "Sept.", "Sgt.", "Sr.", "St.", "aet.", "aetat.", "al.", "bk.", "c.", "cf.", "ch.", "chap.",
4804 "chs.", "col.", "cp.", "d.", "e.g.", "ed.", "eds.", "esp.", "f.", "fasc.", "ff.", "fig.",
4805 "fl.", "fol.", "fols.", "i.e.", "ill.", "incl.", "n.", "n.b.", "nn.", "p.", "pp.", "pt.",
4806 "q.v.", "s.v.", "s.vv.", "saec.", "sec.", "univ.", "viz.", "vol.", "vs.",
4807];
4808
4809fn matches_abbreviation_suffix(s: &str) -> bool {
4810 for &abbr in PANDOC_ABBREVIATIONS {
4811 if let Some(prefix) = s.strip_suffix(abbr) {
4812 if prefix.is_empty() {
4813 return true;
4814 }
4815 let last = prefix.chars().next_back().unwrap();
4816 if !last.is_alphanumeric() && last != '.' {
4817 return true;
4818 }
4819 }
4820 }
4821 false
4822}
4823
4824fn apply_abbreviations(inlines: Vec<Inline>) -> Vec<Inline> {
4834 let inlines: Vec<Inline> = inlines
4835 .into_iter()
4836 .map(|inline| match inline {
4837 Inline::Quoted(kind, content) => Inline::Quoted(kind, apply_abbreviations(content)),
4838 other => other,
4839 })
4840 .collect();
4841 let mut out: Vec<Inline> = Vec::with_capacity(inlines.len());
4842 let mut iter = inlines.into_iter().peekable();
4843 while let Some(inline) = iter.next() {
4844 if let Inline::Str(ref s) = inline
4845 && matches_abbreviation_suffix(s)
4846 && matches!(iter.peek(), Some(Inline::Space))
4847 {
4848 iter.next();
4850 let Inline::Str(mut new_s) = inline else {
4851 unreachable!()
4852 };
4853 new_s.push('\u{a0}');
4854 if let Some(Inline::Str(_)) = iter.peek()
4856 && let Some(Inline::Str(next_s)) = iter.next()
4857 {
4858 new_s.push_str(&next_s);
4859 }
4860 out.push(Inline::Str(new_s));
4861 } else {
4862 out.push(inline);
4863 }
4864 }
4865 out
4866}
4867
4868fn smart_quote_pairs(inlines: Vec<Inline>) -> Vec<Inline> {
4869 fn is_boundary(prev: Option<&Inline>) -> bool {
4877 match prev {
4878 None => true,
4879 Some(Inline::Space | Inline::SoftBreak | Inline::LineBreak) => true,
4880 Some(Inline::Str(s)) => s.chars().last().is_some_and(|c| !c.is_alphanumeric()),
4881 _ => false,
4882 }
4883 }
4884 let mut out: Vec<Inline> = Vec::with_capacity(inlines.len());
4885 let n = inlines.len();
4886 let mut consumed = vec![false; n];
4887 for i in 0..n {
4888 if consumed[i] {
4889 continue;
4890 }
4891 let Inline::Str(s) = &inlines[i] else {
4893 out.push(clone_inline(&inlines[i]));
4894 consumed[i] = true;
4895 continue;
4896 };
4897 let first = s.chars().next();
4898 let quote = match first {
4899 Some('"') => Some('"'),
4900 Some('\'') => Some('\''),
4901 _ => None,
4902 };
4903 let prev_is_boundary = is_boundary(out.last());
4909 let str_has_more = s.chars().count() > 1;
4910 let next_char_is_word = s.chars().nth(1).is_some_and(|c| !c.is_whitespace());
4911 let next_is_markup_atom = matches!(
4912 inlines.get(i + 1),
4913 Some(
4914 Inline::Emph(_)
4915 | Inline::Strong(_)
4916 | Inline::Strikeout(_)
4917 | Inline::Superscript(_)
4918 | Inline::Subscript(_)
4919 | Inline::Code(_, _)
4920 )
4921 );
4922 let attaches =
4923 (str_has_more && next_char_is_word) || (!str_has_more && next_is_markup_atom);
4924 if let Some(q) = quote
4925 && prev_is_boundary
4926 && attaches
4927 {
4928 if let Some(close_idx) = find_matching_close(&inlines, i, q, &consumed) {
4930 let kind = if q == '"' {
4934 "DoubleQuote"
4935 } else {
4936 "SingleQuote"
4937 };
4938 let mut content: Vec<Inline> = Vec::new();
4939 for j in i..=close_idx {
4940 if consumed[j] {
4941 continue;
4942 }
4943 let inline = &inlines[j];
4944 if j == i && j == close_idx {
4945 if let Inline::Str(s) = inline {
4947 let mut chars: Vec<char> = s.chars().collect();
4948 if chars.len() >= 2 {
4949 chars.remove(0);
4950 chars.pop();
4951 }
4952 let stripped: String = chars.into_iter().collect();
4953 if !stripped.is_empty() {
4954 content.push(Inline::Str(stripped));
4955 }
4956 }
4957 } else if j == i {
4958 if let Inline::Str(s) = inline {
4959 let stripped: String = s.chars().skip(1).collect();
4960 if !stripped.is_empty() {
4961 content.push(Inline::Str(stripped));
4962 }
4963 }
4964 } else if j == close_idx {
4965 if let Inline::Str(s) = inline {
4966 let mut stripped: String = s.chars().collect();
4967 stripped.pop();
4968 if !stripped.is_empty() {
4969 content.push(Inline::Str(stripped));
4970 }
4971 }
4972 } else {
4973 content.push(clone_inline(inline));
4974 }
4975 consumed[j] = true;
4976 }
4977 out.push(Inline::Quoted(kind, content));
4978 continue;
4979 }
4980 }
4981 out.push(clone_inline(&inlines[i]));
4982 consumed[i] = true;
4983 }
4984 out
4985}
4986
4987fn find_matching_close(
4988 inlines: &[Inline],
4989 open_idx: usize,
4990 quote: char,
4991 consumed: &[bool],
4992) -> Option<usize> {
4993 if let Inline::Str(s) = &inlines[open_idx]
4995 && s.chars().count() >= 3
4996 && s.ends_with(quote)
4997 {
4998 let next = inlines.get(open_idx + 1);
5000 let after_is_boundary = match next {
5001 None => true,
5002 Some(Inline::Space | Inline::SoftBreak | Inline::LineBreak) => true,
5003 Some(Inline::Str(s)) => s.chars().next().is_some_and(|c| !c.is_alphanumeric()),
5004 _ => false,
5005 };
5006 if after_is_boundary {
5007 return Some(open_idx);
5008 }
5009 }
5010 let n = inlines.len();
5013 let mut j = open_idx + 1;
5014 while j < n {
5015 if consumed[j] {
5016 return None;
5017 }
5018 match &inlines[j] {
5019 Inline::Str(s) => {
5020 if s.ends_with(quote) {
5021 let next = inlines.get(j + 1);
5022 let after_is_boundary = match next {
5023 None => true,
5024 Some(Inline::Space | Inline::SoftBreak | Inline::LineBreak) => true,
5025 Some(Inline::Str(s)) => {
5026 s.chars().next().is_some_and(|c| !c.is_alphanumeric())
5027 }
5028 _ => false,
5029 };
5030 if after_is_boundary {
5031 return Some(j);
5032 }
5033 }
5034 }
5035 Inline::Space | Inline::SoftBreak | Inline::LineBreak => {}
5036 _ => {}
5038 }
5039 j += 1;
5040 if j - open_idx > 32 {
5042 return None;
5043 }
5044 }
5045 None
5046}
5047
5048fn clone_inline(inline: &Inline) -> Inline {
5049 match inline {
5050 Inline::Str(s) => Inline::Str(s.clone()),
5051 Inline::Space => Inline::Space,
5052 Inline::SoftBreak => Inline::SoftBreak,
5053 Inline::LineBreak => Inline::LineBreak,
5054 Inline::Emph(c) => Inline::Emph(c.iter().map(clone_inline).collect()),
5055 Inline::Strong(c) => Inline::Strong(c.iter().map(clone_inline).collect()),
5056 Inline::Strikeout(c) => Inline::Strikeout(c.iter().map(clone_inline).collect()),
5057 Inline::Superscript(c) => Inline::Superscript(c.iter().map(clone_inline).collect()),
5058 Inline::Subscript(c) => Inline::Subscript(c.iter().map(clone_inline).collect()),
5059 Inline::Code(a, s) => Inline::Code(a.clone(), s.clone()),
5060 Inline::Link(a, t, u, ti) => Inline::Link(
5061 a.clone(),
5062 t.iter().map(clone_inline).collect(),
5063 u.clone(),
5064 ti.clone(),
5065 ),
5066 Inline::Image(a, t, u, ti) => Inline::Image(
5067 a.clone(),
5068 t.iter().map(clone_inline).collect(),
5069 u.clone(),
5070 ti.clone(),
5071 ),
5072 Inline::Math(k, c) => Inline::Math(k, c.clone()),
5073 Inline::Span(a, c) => Inline::Span(a.clone(), c.iter().map(clone_inline).collect()),
5074 Inline::RawInline(f, c) => Inline::RawInline(f.clone(), c.clone()),
5075 Inline::Quoted(k, c) => Inline::Quoted(k, c.iter().map(clone_inline).collect()),
5076 Inline::Note(blocks) => Inline::Note(blocks.iter().map(clone_block).collect()),
5077 Inline::Cite(citations, text) => Inline::Cite(
5078 citations
5079 .iter()
5080 .map(|c| Citation {
5081 id: c.id.clone(),
5082 prefix: c.prefix.iter().map(clone_inline).collect(),
5083 suffix: c.suffix.iter().map(clone_inline).collect(),
5084 mode: c.mode,
5085 note_num: c.note_num,
5086 hash: c.hash,
5087 })
5088 .collect(),
5089 text.iter().map(clone_inline).collect(),
5090 ),
5091 Inline::Unsupported(s) => Inline::Unsupported(s.clone()),
5092 }
5093}
5094
5095fn clone_block(b: &Block) -> Block {
5096 match b {
5097 Block::Para(c) => Block::Para(c.iter().map(clone_inline).collect()),
5098 Block::Plain(c) => Block::Plain(c.iter().map(clone_inline).collect()),
5099 Block::Header(lvl, a, c) => {
5100 Block::Header(*lvl, a.clone(), c.iter().map(clone_inline).collect())
5101 }
5102 Block::BlockQuote(blocks) => Block::BlockQuote(blocks.iter().map(clone_block).collect()),
5103 Block::CodeBlock(a, s) => Block::CodeBlock(a.clone(), s.clone()),
5104 Block::HorizontalRule => Block::HorizontalRule,
5105 Block::BulletList(items) => Block::BulletList(
5106 items
5107 .iter()
5108 .map(|item| item.iter().map(clone_block).collect())
5109 .collect(),
5110 ),
5111 Block::OrderedList(start, style, delim, items) => Block::OrderedList(
5112 *start,
5113 style,
5114 delim,
5115 items
5116 .iter()
5117 .map(|item| item.iter().map(clone_block).collect())
5118 .collect(),
5119 ),
5120 Block::RawBlock(f, c) => Block::RawBlock(f.clone(), c.clone()),
5121 Block::Table(_) => Block::Unsupported("Table".to_string()),
5122 Block::Div(a, blocks) => Block::Div(a.clone(), blocks.iter().map(clone_block).collect()),
5123 Block::LineBlock(lines) => Block::LineBlock(
5124 lines
5125 .iter()
5126 .map(|line| line.iter().map(clone_inline).collect())
5127 .collect(),
5128 ),
5129 Block::DefinitionList(items) => Block::DefinitionList(
5130 items
5131 .iter()
5132 .map(|(term, defs)| {
5133 (
5134 term.iter().map(clone_inline).collect(),
5135 defs.iter()
5136 .map(|d| d.iter().map(clone_block).collect())
5137 .collect(),
5138 )
5139 })
5140 .collect(),
5141 ),
5142 Block::Figure(a, caption, body) => Block::Figure(
5143 a.clone(),
5144 caption.iter().map(clone_block).collect(),
5145 body.iter().map(clone_block).collect(),
5146 ),
5147 Block::Unsupported(s) => Block::Unsupported(s.clone()),
5148 }
5149}
5150
5151fn smart_dashes_and_ellipsis(s: &str) -> String {
5152 if !s.contains(['-', '.']) {
5153 return s.to_string();
5154 }
5155 let bytes = s.as_bytes();
5156 let mut out = String::with_capacity(s.len());
5157 let mut i = 0usize;
5158 while i < bytes.len() {
5159 if bytes[i] == b'-' {
5160 if i + 2 < bytes.len() && bytes[i + 1] == b'-' && bytes[i + 2] == b'-' {
5161 out.push('\u{2014}');
5162 i += 3;
5163 continue;
5164 }
5165 if i + 1 < bytes.len() && bytes[i + 1] == b'-' {
5166 out.push('\u{2013}');
5167 i += 2;
5168 continue;
5169 }
5170 }
5171 if bytes[i] == b'.' && i + 2 < bytes.len() && bytes[i + 1] == b'.' && bytes[i + 2] == b'.' {
5172 out.push('\u{2026}');
5173 i += 3;
5174 continue;
5175 }
5176 let len = utf8_char_len(bytes[i]);
5178 out.push_str(&s[i..i + len]);
5179 i += len;
5180 }
5181 out
5182}
5183
5184fn utf8_char_len(b: u8) -> usize {
5185 if b < 0xc0 {
5187 1
5188 } else if b < 0xe0 {
5189 2
5190 } else if b < 0xf0 {
5191 3
5192 } else {
5193 4
5194 }
5195}
5196
5197fn smart_intraword_apostrophe(s: &str) -> String {
5198 if !s.contains('\'') {
5199 return s.to_string();
5200 }
5201 let chars: Vec<char> = s.chars().collect();
5202 let mut out = String::with_capacity(s.len());
5203 for (i, &c) in chars.iter().enumerate() {
5204 if c == '\'' {
5205 let prev = i.checked_sub(1).map(|j| chars[j]);
5206 let next = chars.get(i + 1).copied();
5207 let prev_word = prev.is_some_and(is_word_char);
5208 let next_word = next.is_some_and(is_word_char);
5209 if prev_word && next_word {
5210 out.push('\u{2019}');
5211 continue;
5212 }
5213 }
5214 out.push(c);
5215 }
5216 out
5217}
5218
5219fn is_word_char(c: char) -> bool {
5220 c.is_alphanumeric()
5221}
5222
5223fn inlines_to_plaintext(inlines: &[Inline]) -> String {
5224 let mut s = String::new();
5225 for i in inlines {
5226 match i {
5227 Inline::Str(t) => s.push_str(t),
5228 Inline::Space | Inline::SoftBreak => s.push(' '),
5229 Inline::LineBreak => s.push(' '),
5230 Inline::Emph(children)
5231 | Inline::Strong(children)
5232 | Inline::Strikeout(children)
5233 | Inline::Superscript(children)
5234 | Inline::Subscript(children) => s.push_str(&inlines_to_plaintext(children)),
5235 Inline::Code(_, c) => s.push_str(c),
5236 Inline::Link(_, alt, _, _) | Inline::Image(_, alt, _, _) => {
5237 s.push_str(&inlines_to_plaintext(alt))
5238 }
5239 Inline::Math(_, c) => s.push_str(c),
5240 Inline::Span(_, children) => s.push_str(&inlines_to_plaintext(children)),
5241 Inline::RawInline(_, _) => {}
5242 Inline::Quoted(_, children) => s.push_str(&inlines_to_plaintext(children)),
5243 Inline::Note(_) => {}
5244 Inline::Cite(_, text) => s.push_str(&inlines_to_plaintext(text)),
5245 Inline::Unsupported(_) => {}
5246 }
5247 }
5248 s
5249}
5250
5251fn pandoc_slugify(text: &str) -> String {
5252 let mut out = String::new();
5255 let mut prev_dash = false;
5256 for ch in text.chars() {
5257 if ch.is_whitespace() {
5258 if !out.is_empty() && !prev_dash {
5259 out.push('-');
5260 prev_dash = true;
5261 }
5262 continue;
5263 }
5264 for lc in ch.to_lowercase() {
5265 if lc.is_alphanumeric() || lc == '_' || lc == '-' || lc == '.' {
5266 out.push(lc);
5267 prev_dash = lc == '-';
5268 }
5269 }
5270 }
5271 while out.ends_with('-') {
5272 out.pop();
5273 }
5274 out
5275}
5276
5277impl Attr {
5278 fn with_id(id: String) -> Self {
5279 Self {
5280 id,
5281 classes: Vec::new(),
5282 kvs: Vec::new(),
5283 }
5284 }
5285}
5286
5287fn write_block(b: &Block, out: &mut String) {
5290 match b {
5291 Block::Para(inlines) => {
5292 out.push_str("Para [");
5293 write_inline_list(inlines, out);
5294 out.push_str(" ]");
5295 }
5296 Block::Plain(inlines) => {
5297 out.push_str("Plain [");
5298 write_inline_list(inlines, out);
5299 out.push_str(" ]");
5300 }
5301 Block::Header(level, attr, inlines) => {
5302 out.push_str(&format!("Header {level} ("));
5303 write_attr(attr, out);
5304 out.push_str(") [");
5305 write_inline_list(inlines, out);
5306 out.push_str(" ]");
5307 }
5308 Block::BlockQuote(blocks) => {
5309 out.push_str("BlockQuote [");
5310 write_block_list(blocks, out);
5311 out.push_str(" ]");
5312 }
5313 Block::CodeBlock(attr, content) => {
5314 out.push_str("CodeBlock (");
5315 write_attr(attr, out);
5316 out.push_str(") ");
5317 write_haskell_string(content, out);
5318 }
5319 Block::HorizontalRule => out.push_str("HorizontalRule"),
5320 Block::BulletList(items) => {
5321 out.push_str("BulletList [");
5322 for (i, item) in items.iter().enumerate() {
5323 if i > 0 {
5324 out.push(',');
5325 }
5326 out.push_str(" [");
5327 write_block_list(item, out);
5328 out.push_str(" ]");
5329 }
5330 out.push_str(" ]");
5331 }
5332 Block::OrderedList(start, style, delim, items) => {
5333 out.push_str(&format!("OrderedList ( {start} , {style} , {delim} ) ["));
5334 for (i, item) in items.iter().enumerate() {
5335 if i > 0 {
5336 out.push(',');
5337 }
5338 out.push_str(" [");
5339 write_block_list(item, out);
5340 out.push_str(" ]");
5341 }
5342 out.push_str(" ]");
5343 }
5344 Block::RawBlock(format, content) => {
5345 out.push_str("RawBlock ( Format ");
5346 write_haskell_string(format, out);
5347 out.push_str(" ) ");
5348 write_haskell_string(content, out);
5349 }
5350 Block::Table(data) => {
5351 write_table(data, out);
5352 }
5353 Block::Div(attr, blocks) => {
5354 out.push_str("Div (");
5355 write_attr(attr, out);
5356 out.push_str(") [");
5357 write_block_list(blocks, out);
5358 out.push_str(" ]");
5359 }
5360 Block::LineBlock(lines) => {
5361 out.push_str("LineBlock [");
5362 for (i, line) in lines.iter().enumerate() {
5363 if i > 0 {
5364 out.push(',');
5365 }
5366 out.push_str(" [");
5367 write_inline_list(line, out);
5368 out.push_str(" ]");
5369 }
5370 out.push_str(" ]");
5371 }
5372 Block::DefinitionList(items) => {
5373 out.push_str("DefinitionList [");
5374 for (i, (term, defs)) in items.iter().enumerate() {
5375 if i > 0 {
5376 out.push(',');
5377 }
5378 out.push_str(" ( [");
5379 write_inline_list(term, out);
5380 out.push_str(" ] , [");
5381 for (j, def) in defs.iter().enumerate() {
5382 if j > 0 {
5383 out.push(',');
5384 }
5385 out.push_str(" [");
5386 write_block_list(def, out);
5387 out.push_str(" ]");
5388 }
5389 out.push_str(" ] )");
5390 }
5391 out.push_str(" ]");
5392 }
5393 Block::Figure(attr, caption, body) => {
5394 out.push_str("Figure (");
5395 write_attr(attr, out);
5396 out.push_str(") ( Caption Nothing [");
5397 write_block_list(caption, out);
5398 out.push_str(" ] ) [");
5399 write_block_list(body, out);
5400 out.push_str(" ]");
5401 }
5402 Block::Unsupported(name) => {
5403 out.push_str(&format!("Unsupported {name:?}"));
5404 }
5405 }
5406}
5407
5408fn write_table(data: &TableData, out: &mut String) {
5409 out.push_str("Table (");
5410 write_attr(&data.attr, out);
5411 out.push_str(") ( Caption Nothing [");
5412 if !data.caption.is_empty() {
5413 out.push_str(" Plain [");
5414 write_inline_list(&data.caption, out);
5415 out.push_str(" ]");
5416 }
5417 out.push_str(" ] ) [");
5418 for (i, align) in data.aligns.iter().enumerate() {
5419 if i > 0 {
5420 out.push(',');
5421 }
5422 let width = data.widths.get(i).copied().unwrap_or(None);
5423 match width {
5424 None => out.push_str(&format!(" ( {align} , ColWidthDefault )")),
5425 Some(w) => out.push_str(&format!(" ( {align} , ColWidth {} )", show_double(w))),
5426 }
5427 }
5428 out.push_str(" ] ( TableHead ( \"\" , [ ] , [ ] ) [");
5429 for (i, row) in data.head_rows.iter().enumerate() {
5430 if i > 0 {
5431 out.push(',');
5432 }
5433 out.push(' ');
5434 write_table_row(row, out);
5435 }
5436 out.push_str(" ] ) [ TableBody ( \"\" , [ ] , [ ] ) ( RowHeadColumns 0 ) [ ] [");
5437 for (i, row) in data.body_rows.iter().enumerate() {
5438 if i > 0 {
5439 out.push(',');
5440 }
5441 out.push(' ');
5442 write_table_row(row, out);
5443 }
5444 out.push_str(" ] ] ( TableFoot ( \"\" , [ ] , [ ] ) [");
5445 for (i, row) in data.foot_rows.iter().enumerate() {
5446 if i > 0 {
5447 out.push(',');
5448 }
5449 out.push(' ');
5450 write_table_row(row, out);
5451 }
5452 out.push_str(" ] )");
5453}
5454
5455fn write_table_row(cells: &[GridCell], out: &mut String) {
5456 out.push_str("Row ( \"\" , [ ] , [ ] ) [");
5457 for (i, cell) in cells.iter().enumerate() {
5458 if i > 0 {
5459 out.push(',');
5460 }
5461 out.push_str(&format!(
5462 " Cell ( \"\" , [ ] , [ ] ) AlignDefault ( RowSpan {} ) ( ColSpan {} ) [",
5463 cell.row_span, cell.col_span
5464 ));
5465 if !cell.blocks.is_empty() {
5466 write_block_list(&cell.blocks, out);
5467 }
5468 out.push_str(" ]");
5469 }
5470 out.push_str(" ]");
5471}
5472
5473fn write_block_list(blocks: &[Block], out: &mut String) {
5474 for (i, b) in blocks.iter().enumerate() {
5475 if i > 0 {
5476 out.push(',');
5477 }
5478 out.push(' ');
5479 write_block(b, out);
5480 }
5481}
5482
5483fn write_inline_list(inlines: &[Inline], out: &mut String) {
5484 for (i, inline) in inlines.iter().enumerate() {
5485 if i > 0 {
5486 out.push(',');
5487 }
5488 out.push(' ');
5489 write_inline(inline, out);
5490 }
5491}
5492
5493fn write_inline(inline: &Inline, out: &mut String) {
5494 match inline {
5495 Inline::Str(s) => {
5496 out.push_str("Str ");
5497 write_haskell_string(s, out);
5498 }
5499 Inline::Space => out.push_str("Space"),
5500 Inline::SoftBreak => out.push_str("SoftBreak"),
5501 Inline::LineBreak => out.push_str("LineBreak"),
5502 Inline::Emph(children) => {
5503 out.push_str("Emph [");
5504 write_inline_list(children, out);
5505 out.push_str(" ]");
5506 }
5507 Inline::Strong(children) => {
5508 out.push_str("Strong [");
5509 write_inline_list(children, out);
5510 out.push_str(" ]");
5511 }
5512 Inline::Strikeout(children) => {
5513 out.push_str("Strikeout [");
5514 write_inline_list(children, out);
5515 out.push_str(" ]");
5516 }
5517 Inline::Superscript(children) => {
5518 out.push_str("Superscript [");
5519 write_inline_list(children, out);
5520 out.push_str(" ]");
5521 }
5522 Inline::Subscript(children) => {
5523 out.push_str("Subscript [");
5524 write_inline_list(children, out);
5525 out.push_str(" ]");
5526 }
5527 Inline::Code(attr, content) => {
5528 out.push_str("Code (");
5529 write_attr(attr, out);
5530 out.push_str(") ");
5531 write_haskell_string(content, out);
5532 }
5533 Inline::Link(attr, text, url, title) => {
5534 out.push_str("Link (");
5535 write_attr(attr, out);
5536 out.push_str(") [");
5537 write_inline_list(text, out);
5538 out.push_str(" ] ( ");
5539 write_haskell_string(url, out);
5540 out.push_str(" , ");
5541 write_haskell_string(title, out);
5542 out.push_str(" )");
5543 }
5544 Inline::Image(attr, alt, url, title) => {
5545 out.push_str("Image (");
5546 write_attr(attr, out);
5547 out.push_str(") [");
5548 write_inline_list(alt, out);
5549 out.push_str(" ] ( ");
5550 write_haskell_string(url, out);
5551 out.push_str(" , ");
5552 write_haskell_string(title, out);
5553 out.push_str(" )");
5554 }
5555 Inline::Math(kind, content) => {
5556 out.push_str("Math ");
5557 out.push_str(kind);
5558 out.push(' ');
5559 write_haskell_string(content, out);
5560 }
5561 Inline::Span(attr, children) => {
5562 out.push_str("Span (");
5563 write_attr(attr, out);
5564 out.push_str(") [");
5565 write_inline_list(children, out);
5566 out.push_str(" ]");
5567 }
5568 Inline::RawInline(format, content) => {
5569 out.push_str("RawInline ( Format ");
5570 write_haskell_string(format, out);
5571 out.push_str(" ) ");
5572 write_haskell_string(content, out);
5573 }
5574 Inline::Quoted(kind, children) => {
5575 out.push_str("Quoted ");
5576 out.push_str(kind);
5577 out.push_str(" [");
5578 write_inline_list(children, out);
5579 out.push_str(" ]");
5580 }
5581 Inline::Note(blocks) => {
5582 out.push_str("Note [");
5583 write_block_list(blocks, out);
5584 out.push_str(" ]");
5585 }
5586 Inline::Cite(citations, text) => {
5587 out.push_str("Cite [");
5588 for (i, c) in citations.iter().enumerate() {
5589 if i > 0 {
5590 out.push(',');
5591 }
5592 out.push_str(" Citation { citationId = ");
5593 write_haskell_string(&c.id, out);
5594 out.push_str(" , citationPrefix = [");
5595 write_inline_list(&c.prefix, out);
5596 out.push_str(" ] , citationSuffix = [");
5597 write_inline_list(&c.suffix, out);
5598 out.push_str(" ] , citationMode = ");
5599 out.push_str(match c.mode {
5600 CitationMode::AuthorInText => "AuthorInText",
5601 CitationMode::NormalCitation => "NormalCitation",
5602 CitationMode::SuppressAuthor => "SuppressAuthor",
5603 });
5604 out.push_str(&format!(
5605 " , citationNoteNum = {} , citationHash = {} }}",
5606 c.note_num, c.hash
5607 ));
5608 }
5609 out.push_str(" ] [");
5610 write_inline_list(text, out);
5611 out.push_str(" ]");
5612 }
5613 Inline::Unsupported(name) => {
5614 out.push_str(&format!("Unsupported {name:?}"));
5615 }
5616 }
5617}
5618
5619fn write_attr(attr: &Attr, out: &mut String) {
5620 out.push(' ');
5621 write_haskell_string(&attr.id, out);
5622 out.push_str(" , [");
5623 for (i, c) in attr.classes.iter().enumerate() {
5624 if i > 0 {
5625 out.push(',');
5626 }
5627 out.push(' ');
5628 write_haskell_string(c, out);
5629 }
5630 if !attr.classes.is_empty() {
5631 out.push(' ');
5632 }
5633 out.push_str("] , [");
5634 for (i, (k, v)) in attr.kvs.iter().enumerate() {
5635 if i > 0 {
5636 out.push(',');
5637 }
5638 out.push_str(" ( ");
5639 write_haskell_string(k, out);
5640 out.push_str(" , ");
5641 write_haskell_string(v, out);
5642 out.push_str(" )");
5643 }
5644 if !attr.kvs.is_empty() {
5645 out.push(' ');
5646 }
5647 out.push_str("] ");
5648}
5649
5650fn write_haskell_string(s: &str, out: &mut String) {
5651 out.push('"');
5652 let mut prev_was_numeric_escape = false;
5653 for ch in s.chars() {
5654 let code = ch as u32;
5655 let is_ascii_printable = (0x20..0x7f).contains(&code);
5656 match ch {
5657 '"' => {
5658 out.push_str("\\\"");
5659 prev_was_numeric_escape = false;
5660 }
5661 '\\' => {
5662 out.push_str("\\\\");
5663 prev_was_numeric_escape = false;
5664 }
5665 '\n' => {
5666 out.push_str("\\n");
5667 prev_was_numeric_escape = false;
5668 }
5669 '\t' => {
5670 out.push_str("\\t");
5671 prev_was_numeric_escape = false;
5672 }
5673 '\r' => {
5674 out.push_str("\\r");
5675 prev_was_numeric_escape = false;
5676 }
5677 _ if is_ascii_printable => {
5678 if prev_was_numeric_escape && ch.is_ascii_digit() {
5681 out.push_str("\\&");
5682 }
5683 out.push(ch);
5684 prev_was_numeric_escape = false;
5685 }
5686 _ => {
5687 out.push('\\');
5689 out.push_str(&code.to_string());
5690 prev_was_numeric_escape = true;
5691 }
5692 }
5693 }
5694 out.push('"');
5695}
5696
5697fn attr_to_json(attr: &Attr) -> Value {
5705 let kvs: Vec<Value> = attr.kvs.iter().map(|(k, v)| json!([k, v])).collect();
5706 json!([attr.id, attr.classes, kvs])
5707}
5708
5709fn target_to_json(url: &str, title: &str) -> Value {
5710 json!([url, title])
5711}
5712
5713fn inlines_to_json(inlines: &[Inline]) -> Vec<Value> {
5714 inlines.iter().map(inline_to_json).collect()
5715}
5716
5717fn blocks_to_json(blocks: &[Block]) -> Vec<Value> {
5718 blocks.iter().map(block_to_json).collect()
5719}
5720
5721fn citation_to_json(c: &Citation) -> Value {
5722 let mode = match c.mode {
5723 CitationMode::AuthorInText => "AuthorInText",
5724 CitationMode::NormalCitation => "NormalCitation",
5725 CitationMode::SuppressAuthor => "SuppressAuthor",
5726 };
5727 json!({
5728 "citationId": c.id,
5729 "citationPrefix": inlines_to_json(&c.prefix),
5730 "citationSuffix": inlines_to_json(&c.suffix),
5731 "citationMode": { "t": mode },
5732 "citationNoteNum": c.note_num,
5733 "citationHash": c.hash,
5734 })
5735}
5736
5737fn inline_to_json(inline: &Inline) -> Value {
5738 match inline {
5739 Inline::Str(s) => json!({ "t": "Str", "c": s }),
5740 Inline::Space => json!({ "t": "Space" }),
5741 Inline::SoftBreak => json!({ "t": "SoftBreak" }),
5742 Inline::LineBreak => json!({ "t": "LineBreak" }),
5743 Inline::Emph(children) => json!({ "t": "Emph", "c": inlines_to_json(children) }),
5744 Inline::Strong(children) => json!({ "t": "Strong", "c": inlines_to_json(children) }),
5745 Inline::Strikeout(children) => {
5746 json!({ "t": "Strikeout", "c": inlines_to_json(children) })
5747 }
5748 Inline::Superscript(children) => {
5749 json!({ "t": "Superscript", "c": inlines_to_json(children) })
5750 }
5751 Inline::Subscript(children) => {
5752 json!({ "t": "Subscript", "c": inlines_to_json(children) })
5753 }
5754 Inline::Code(attr, content) => {
5755 json!({ "t": "Code", "c": [attr_to_json(attr), content] })
5756 }
5757 Inline::Link(attr, text, url, title) => json!({
5758 "t": "Link",
5759 "c": [attr_to_json(attr), inlines_to_json(text), target_to_json(url, title)],
5760 }),
5761 Inline::Image(attr, alt, url, title) => json!({
5762 "t": "Image",
5763 "c": [attr_to_json(attr), inlines_to_json(alt), target_to_json(url, title)],
5764 }),
5765 Inline::Math(kind, content) => json!({
5766 "t": "Math",
5767 "c": [{ "t": kind }, content],
5768 }),
5769 Inline::Span(attr, children) => json!({
5770 "t": "Span",
5771 "c": [attr_to_json(attr), inlines_to_json(children)],
5772 }),
5773 Inline::RawInline(format, content) => json!({
5774 "t": "RawInline",
5775 "c": [format, content],
5776 }),
5777 Inline::Quoted(kind, children) => json!({
5778 "t": "Quoted",
5779 "c": [{ "t": kind }, inlines_to_json(children)],
5780 }),
5781 Inline::Note(blocks) => json!({ "t": "Note", "c": blocks_to_json(blocks) }),
5782 Inline::Cite(citations, text) => json!({
5783 "t": "Cite",
5784 "c": [
5785 citations.iter().map(citation_to_json).collect::<Vec<_>>(),
5786 inlines_to_json(text),
5787 ],
5788 }),
5789 Inline::Unsupported(name) => json!({ "t": "Unsupported", "c": name }),
5790 }
5791}
5792
5793fn block_to_json(b: &Block) -> Value {
5794 match b {
5795 Block::Para(inlines) => json!({ "t": "Para", "c": inlines_to_json(inlines) }),
5796 Block::Plain(inlines) => json!({ "t": "Plain", "c": inlines_to_json(inlines) }),
5797 Block::Header(level, attr, inlines) => json!({
5798 "t": "Header",
5799 "c": [level, attr_to_json(attr), inlines_to_json(inlines)],
5800 }),
5801 Block::BlockQuote(blocks) => {
5802 json!({ "t": "BlockQuote", "c": blocks_to_json(blocks) })
5803 }
5804 Block::CodeBlock(attr, content) => json!({
5805 "t": "CodeBlock",
5806 "c": [attr_to_json(attr), content],
5807 }),
5808 Block::HorizontalRule => json!({ "t": "HorizontalRule" }),
5809 Block::BulletList(items) => {
5810 let items_json: Vec<Vec<Value>> = items.iter().map(|it| blocks_to_json(it)).collect();
5811 json!({ "t": "BulletList", "c": items_json })
5812 }
5813 Block::OrderedList(start, style, delim, items) => {
5814 let items_json: Vec<Vec<Value>> = items.iter().map(|it| blocks_to_json(it)).collect();
5815 json!({
5816 "t": "OrderedList",
5817 "c": [
5818 [json!(start), json!({ "t": style }), json!({ "t": delim })],
5819 items_json,
5820 ],
5821 })
5822 }
5823 Block::RawBlock(format, content) => json!({
5824 "t": "RawBlock",
5825 "c": [format, content],
5826 }),
5827 Block::Table(data) => table_to_json(data),
5828 Block::Div(attr, blocks) => json!({
5829 "t": "Div",
5830 "c": [attr_to_json(attr), blocks_to_json(blocks)],
5831 }),
5832 Block::LineBlock(lines) => {
5833 let lines_json: Vec<Vec<Value>> =
5834 lines.iter().map(|line| inlines_to_json(line)).collect();
5835 json!({ "t": "LineBlock", "c": lines_json })
5836 }
5837 Block::DefinitionList(items) => {
5838 let items_json: Vec<Value> = items
5839 .iter()
5840 .map(|(term, defs)| {
5841 let defs_json: Vec<Vec<Value>> =
5842 defs.iter().map(|d| blocks_to_json(d)).collect();
5843 json!([inlines_to_json(term), defs_json])
5844 })
5845 .collect();
5846 json!({ "t": "DefinitionList", "c": items_json })
5847 }
5848 Block::Figure(attr, caption, body) => {
5849 let caption_json = json!([Value::Null, blocks_to_json(caption)]);
5852 json!({
5853 "t": "Figure",
5854 "c": [attr_to_json(attr), caption_json, blocks_to_json(body)],
5855 })
5856 }
5857 Block::Unsupported(name) => json!({ "t": "Unsupported", "c": name }),
5858 }
5859}
5860
5861fn table_to_json(data: &TableData) -> Value {
5862 let caption_blocks: Vec<Value> = if data.caption.is_empty() {
5864 Vec::new()
5865 } else {
5866 vec![json!({ "t": "Plain", "c": inlines_to_json(&data.caption) })]
5867 };
5868 let caption_json = json!([Value::Null, caption_blocks]);
5869
5870 let colspecs: Vec<Value> = data
5873 .aligns
5874 .iter()
5875 .enumerate()
5876 .map(|(i, align)| {
5877 let width = data.widths.get(i).copied().unwrap_or(None);
5878 let width_json = match width {
5879 None => json!({ "t": "ColWidthDefault" }),
5880 Some(w) => json!({ "t": "ColWidth", "c": w }),
5881 };
5882 json!([{ "t": align }, width_json])
5883 })
5884 .collect();
5885
5886 let empty_attr = json!(["", Vec::<Value>::new(), Vec::<Value>::new()]);
5887
5888 let head_rows: Vec<Value> = data
5889 .head_rows
5890 .iter()
5891 .map(|r| table_row_to_json(r))
5892 .collect();
5893 let body_rows: Vec<Value> = data
5894 .body_rows
5895 .iter()
5896 .map(|r| table_row_to_json(r))
5897 .collect();
5898 let foot_rows: Vec<Value> = data
5899 .foot_rows
5900 .iter()
5901 .map(|r| table_row_to_json(r))
5902 .collect();
5903
5904 let table_head = json!([empty_attr, head_rows]);
5905 let table_bodies = json!([[empty_attr, 0, Vec::<Value>::new(), body_rows,]]);
5906 let table_foot = json!([empty_attr, foot_rows]);
5907
5908 json!({
5909 "t": "Table",
5910 "c": [
5911 attr_to_json(&data.attr),
5912 caption_json,
5913 colspecs,
5914 table_head,
5915 table_bodies,
5916 table_foot,
5917 ],
5918 })
5919}
5920
5921fn table_row_to_json(cells: &[GridCell]) -> Value {
5922 let empty_attr = json!(["", Vec::<Value>::new(), Vec::<Value>::new()]);
5923 let cells_json: Vec<Value> = cells
5924 .iter()
5925 .map(|cell| {
5926 json!([
5927 empty_attr,
5928 { "t": "AlignDefault" },
5929 cell.row_span,
5930 cell.col_span,
5931 blocks_to_json(&cell.blocks),
5932 ])
5933 })
5934 .collect();
5935 json!([empty_attr, cells_json])
5936}
5937
5938#[cfg(test)]
5939mod tests {
5940 use super::*;
5941 use crate::parser::parse;
5942 use serde_json::Value;
5943
5944 fn parse_to_json(input: &str) -> Value {
5945 let tree = parse(input, None);
5946 let s = to_pandoc_json(&tree);
5947 serde_json::from_str(&s).expect("to_pandoc_json must emit valid JSON")
5948 }
5949
5950 #[test]
5951 fn empty_doc_emits_envelope_with_no_blocks() {
5952 let v = parse_to_json("");
5953 assert_eq!(v["pandoc-api-version"], serde_json::json!([1, 23, 1, 1]));
5954 assert_eq!(v["meta"], serde_json::json!({}));
5955 assert_eq!(v["blocks"], serde_json::json!([]));
5956 }
5957
5958 #[test]
5959 fn paragraph_with_str_emits_para_str_shape() {
5960 let v = parse_to_json("hello");
5961 let blocks = v["blocks"].as_array().expect("blocks is array");
5962 assert_eq!(blocks.len(), 1);
5963 let para = &blocks[0];
5964 assert_eq!(para["t"], "Para");
5965 let inlines = para["c"].as_array().expect("Para.c is array");
5966 assert_eq!(inlines.len(), 1);
5967 assert_eq!(inlines[0]["t"], "Str");
5968 assert_eq!(inlines[0]["c"], "hello");
5969 }
5970
5971 #[test]
5972 fn nullary_constructors_omit_c_key() {
5973 let v = parse_to_json("a b");
5975 let inlines = v["blocks"][0]["c"].as_array().expect("Para.c is array");
5976 let space = inlines
5978 .iter()
5979 .find(|i| i["t"] == "Space")
5980 .expect("Space inline present");
5981 let space_obj = space.as_object().expect("Space is JSON object");
5982 assert!(
5983 !space_obj.contains_key("c"),
5984 "nullary constructors must omit the \"c\" key, got {space:?}",
5985 );
5986 }
5987
5988 #[test]
5989 fn header_attr_shape_matches_pandoc_tuple() {
5990 let v = parse_to_json("# Hi {#foo .bar key=val}");
5992 let header = &v["blocks"][0];
5993 assert_eq!(header["t"], "Header");
5994 let c = header["c"].as_array().expect("Header.c is array");
5995 assert_eq!(c.len(), 3);
5996 assert_eq!(c[0], 1, "level");
5997 let attr = c[1].as_array().expect("attr tuple");
5999 assert_eq!(attr[0], "foo");
6000 assert_eq!(attr[1], serde_json::json!(["bar"]));
6001 assert_eq!(attr[2], serde_json::json!([["key", "val"]]));
6002 }
6003}