1use std::cell::RefCell;
19use std::collections::{HashMap, HashSet};
20
21use crate::SyntaxNode;
22use crate::syntax::SyntaxKind;
23use rowan::NodeOrToken;
24use serde_json::{Value, json};
25
26const PANDOC_API_VERSION: [u32; 4] = [1, 23, 1, 1];
32
33#[derive(Default)]
34struct RefsCtx {
35 refs: HashMap<String, (String, String)>,
36 heading_ids: HashSet<String>,
37 heading_id_by_offset: HashMap<u32, String>,
42 footnotes: HashMap<String, Vec<Block>>,
46 example_label_to_num: HashMap<String, usize>,
51 example_list_start_by_offset: HashMap<u32, usize>,
56 cite_note_num_by_offset: HashMap<u32, i64>,
60}
61
62thread_local! {
63 static REFS_CTX: RefCell<RefsCtx> = RefCell::new(RefsCtx::default());
64}
65
66pub fn to_pandoc_ast(tree: &SyntaxNode) -> String {
74 let ctx = build_refs_ctx(tree);
75 REFS_CTX.with(|c| *c.borrow_mut() = ctx);
76 let blocks = blocks_from_doc(tree);
77 let mut out = String::new();
78 out.push('[');
79 for (i, b) in blocks.iter().enumerate() {
80 if i > 0 {
81 out.push(',');
82 }
83 out.push(' ');
84 write_block(b, &mut out);
85 }
86 out.push_str(" ]");
87 REFS_CTX.with(|c| *c.borrow_mut() = RefsCtx::default());
88 out
89}
90
91pub fn to_pandoc_json(tree: &SyntaxNode) -> String {
111 let ctx = build_refs_ctx(tree);
112 REFS_CTX.with(|c| *c.borrow_mut() = ctx);
113 let blocks = blocks_from_doc(tree);
114 let blocks_json: Vec<Value> = blocks.iter().map(block_to_json).collect();
115 REFS_CTX.with(|c| *c.borrow_mut() = RefsCtx::default());
116 let doc = json!({
117 "pandoc-api-version": PANDOC_API_VERSION,
118 "meta": {},
119 "blocks": blocks_json,
120 });
121 serde_json::to_string(&doc).expect("pandoc-json serialization is infallible")
122}
123
124fn build_refs_ctx(tree: &SyntaxNode) -> RefsCtx {
125 build_refs_ctx_inherited(tree, None)
126}
127
128fn build_refs_ctx_inherited(tree: &SyntaxNode, parent: Option<&RefsCtx>) -> RefsCtx {
129 let mut ctx = RefsCtx::default();
130 collect_cite_note_nums(tree, &mut ctx);
131 let mut example_counter: usize = 0;
132 collect_example_numbering(tree, &mut ctx, &mut example_counter);
133 REFS_CTX.with(|c| {
134 let mut borrowed = c.borrow_mut();
135 borrowed.cite_note_num_by_offset = ctx.cite_note_num_by_offset.clone();
136 borrowed.example_label_to_num = ctx.example_label_to_num.clone();
137 borrowed.example_list_start_by_offset = ctx.example_list_start_by_offset.clone();
138 });
139 let mut seen_ids: HashMap<String, u32> = HashMap::new();
144 if let Some(p) = parent {
145 for id in &p.heading_ids {
146 if let Some(idx) = id.rfind('-')
147 && let Ok(n) = id[idx + 1..].parse::<u32>()
148 {
149 let base = id[..idx].to_string();
150 let entry = seen_ids.entry(base).or_insert(0);
151 *entry = (*entry).max(n + 1);
152 }
153 let entry = seen_ids.entry(id.clone()).or_insert(0);
154 *entry = (*entry).max(1);
155 }
156 }
157 collect_refs_and_headings(tree, &mut ctx, &mut seen_ids);
158 if let Some(p) = parent {
164 for (k, v) in &p.refs {
165 ctx.refs.entry(k.clone()).or_insert_with(|| v.clone());
166 }
167 for (k, v) in &p.footnotes {
168 ctx.footnotes.entry(k.clone()).or_insert_with(|| v.clone());
169 }
170 for id in &p.heading_ids {
171 ctx.heading_ids.insert(id.clone());
172 }
173 }
174 ctx
175}
176
177fn collect_cite_note_nums(tree: &SyntaxNode, ctx: &mut RefsCtx) {
183 let mut footnote_def_nodes: HashMap<String, SyntaxNode> = HashMap::new();
184 for child in tree.descendants() {
185 if child.kind() == SyntaxKind::FOOTNOTE_DEFINITION
186 && let Some(label) = footnote_label(&child)
187 {
188 footnote_def_nodes.entry(label).or_insert(child);
189 }
190 }
191 let mut counter: i64 = 0;
192 for child in tree.children() {
193 if child.kind() == SyntaxKind::FOOTNOTE_DEFINITION {
194 continue;
195 }
196 visit_for_cite_nums(&child, &footnote_def_nodes, &mut counter, None, ctx);
197 }
198}
199
200fn visit_for_cite_nums(
201 node: &SyntaxNode,
202 fn_defs: &HashMap<String, SyntaxNode>,
203 counter: &mut i64,
204 in_fn: Option<i64>,
205 ctx: &mut RefsCtx,
206) {
207 for el in node.children_with_tokens() {
208 if let NodeOrToken::Node(n) = el {
209 match n.kind() {
210 SyntaxKind::CITATION => {
211 let offset: u32 = n.text_range().start().into();
212 let num = if let Some(fn_num) = in_fn {
213 fn_num
214 } else {
215 *counter += 1;
216 *counter
217 };
218 ctx.cite_note_num_by_offset.insert(offset, num);
219 }
220 SyntaxKind::FOOTNOTE_REFERENCE => {
221 if in_fn.is_none() {
222 *counter += 1;
223 let fn_num = *counter;
224 if let Some(label) = footnote_label(&n)
225 && let Some(def) = fn_defs.get(&label)
226 {
227 visit_for_cite_nums(def, fn_defs, counter, Some(fn_num), ctx);
228 }
229 }
230 }
231 _ => visit_for_cite_nums(&n, fn_defs, counter, in_fn, ctx),
232 }
233 }
234 }
235}
236
237fn collect_example_numbering(node: &SyntaxNode, ctx: &mut RefsCtx, counter: &mut usize) {
243 for child in node.children() {
244 if child.kind() == SyntaxKind::LIST && list_is_example(&child) {
245 let list_offset: u32 = child.text_range().start().into();
246 ctx.example_list_start_by_offset
247 .insert(list_offset, *counter + 1);
248 for item in child
249 .children()
250 .filter(|c| c.kind() == SyntaxKind::LIST_ITEM)
251 {
252 *counter += 1;
253 if let Some(label) = example_item_label(&item) {
254 ctx.example_label_to_num.entry(label).or_insert(*counter);
255 }
256 }
257 collect_example_numbering(&child, ctx, counter);
260 } else {
261 collect_example_numbering(&child, ctx, counter);
262 }
263 }
264}
265
266fn list_is_example(list: &SyntaxNode) -> bool {
270 let Some(item) = list.children().find(|c| c.kind() == SyntaxKind::LIST_ITEM) else {
271 return false;
272 };
273 let marker = list_item_marker_text(&item);
274 let trimmed = marker.trim();
275 let body = if let Some(inner) = trimmed.strip_prefix('(').and_then(|s| s.strip_suffix(')')) {
276 inner
277 } else if let Some(inner) = trimmed.strip_suffix(')') {
278 inner
279 } else if let Some(inner) = trimmed.strip_suffix('.') {
280 inner
281 } else {
282 trimmed
283 };
284 body.starts_with('@')
285 && body[1..]
286 .chars()
287 .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
288}
289
290fn list_item_marker_text(item: &SyntaxNode) -> String {
291 item.children_with_tokens()
292 .filter_map(|el| el.into_token())
293 .find(|t| t.kind() == SyntaxKind::LIST_MARKER)
294 .map(|t| t.text().to_string())
295 .unwrap_or_default()
296}
297
298fn example_item_label(item: &SyntaxNode) -> Option<String> {
301 let marker = list_item_marker_text(item);
302 let trimmed = marker.trim();
303 let body = trimmed
304 .strip_prefix('(')
305 .and_then(|s| s.strip_suffix(')'))
306 .or_else(|| trimmed.strip_suffix(')'))
307 .or_else(|| trimmed.strip_suffix('.'))
308 .unwrap_or(trimmed);
309 let label = body.strip_prefix('@')?;
310 if label.is_empty() {
311 None
312 } else {
313 Some(label.to_string())
314 }
315}
316
317fn collect_refs_and_headings(
318 node: &SyntaxNode,
319 ctx: &mut RefsCtx,
320 seen_ids: &mut HashMap<String, u32>,
321) {
322 for child in node.children() {
323 match child.kind() {
324 SyntaxKind::REFERENCE_DEFINITION => {
325 if let Some((label, url, title)) = parse_reference_def(&child) {
326 ctx.refs
327 .entry(normalize_ref_label(&label))
328 .or_insert((url, title));
329 }
330 }
331 SyntaxKind::FOOTNOTE_DEFINITION => {
332 if let Some((label, blocks)) = parse_footnote_def(&child) {
333 ctx.footnotes.entry(label).or_insert(blocks);
334 }
335 }
336 SyntaxKind::HEADING => {
337 let (id, was_explicit) = heading_id_with_explicitness(&child);
338 let final_id = if was_explicit {
339 seen_ids.entry(id.clone()).or_insert(0);
342 id
343 } else {
344 let mut base = id;
345 if base.is_empty() {
346 base = "section".to_string();
347 }
348 let count = seen_ids.entry(base.clone()).or_insert(0);
349 let id = if *count == 0 {
350 base
351 } else {
352 format!("{base}-{count}")
353 };
354 *count += 1;
355 id
356 };
357 if !final_id.is_empty() {
358 let offset: u32 = child.text_range().start().into();
359 ctx.heading_ids.insert(final_id.clone());
360 ctx.heading_id_by_offset.insert(offset, final_id);
361 }
362 collect_refs_and_headings(&child, ctx, seen_ids);
363 }
364 _ => collect_refs_and_headings(&child, ctx, seen_ids),
365 }
366 }
367}
368
369fn heading_id_with_explicitness(node: &SyntaxNode) -> (String, bool) {
373 let inlines = node
374 .children()
375 .find(|c| c.kind() == SyntaxKind::HEADING_CONTENT)
376 .map(|c| coalesce_inlines(inlines_from(&c)))
377 .unwrap_or_default();
378 let attr = node.children_with_tokens().find_map(|el| match el {
379 NodeOrToken::Node(n) if n.kind() == SyntaxKind::ATTRIBUTE => Some(n.text().to_string()),
380 NodeOrToken::Token(t) if t.kind() == SyntaxKind::ATTRIBUTE => Some(t.text().to_string()),
381 _ => None,
382 });
383 if let Some(raw) = attr {
384 let trimmed = raw.trim();
385 if let Some(inner) = trimmed.strip_prefix('{').and_then(|s| s.strip_suffix('}')) {
386 let parsed = parse_attr_block(inner);
387 if !parsed.id.is_empty() {
388 return (parsed.id, true);
389 }
390 }
391 }
392 (pandoc_slugify(&inlines_to_plaintext(&inlines)), false)
393}
394
395fn parse_footnote_def(node: &SyntaxNode) -> Option<(String, Vec<Block>)> {
396 let label = footnote_label(node)?;
397 let mut blocks = Vec::new();
398 for child in node.children() {
399 if child.kind() == SyntaxKind::CODE_BLOCK
406 && !child
407 .children()
408 .any(|c| c.kind() == SyntaxKind::CODE_FENCE_OPEN)
409 {
410 blocks.push(indented_code_block_with_extra_strip(&child, 4));
411 } else {
412 collect_block(&child, &mut blocks);
413 }
414 }
415 Some((label, blocks))
416}
417
418fn indented_code_block_with_extra_strip(node: &SyntaxNode, extra: usize) -> Block {
419 let raw_format = code_block_raw_format(node);
420 let attr = code_block_attr(node);
421 let is_fenced = node
422 .children()
423 .any(|c| c.kind() == SyntaxKind::CODE_FENCE_OPEN);
424 let mut content = String::new();
425 for child in node.children() {
426 if child.kind() == SyntaxKind::CODE_CONTENT {
427 content.push_str(&child.text().to_string());
428 }
429 }
430 while content.ends_with('\n') {
431 content.pop();
432 }
433 content = content
438 .split('\n')
439 .map(expand_tabs_to_4)
440 .collect::<Vec<_>>()
441 .join("\n");
442 content = strip_leading_spaces_per_line(&content, extra);
443 if !is_fenced {
444 content = strip_indented_code_indent(&content);
445 }
446 if let Some(fmt) = raw_format {
447 return Block::RawBlock(fmt, content);
448 }
449 Block::CodeBlock(attr, content)
450}
451
452fn strip_leading_spaces_per_line(s: &str, n: usize) -> String {
453 let mut out = String::with_capacity(s.len());
454 for (i, line) in s.split('\n').enumerate() {
455 if i > 0 {
456 out.push('\n');
457 }
458 let to_strip = line.chars().take(n).take_while(|&c| c == ' ').count();
459 out.push_str(&line[to_strip..]);
460 }
461 out
462}
463
464fn footnote_label(node: &SyntaxNode) -> Option<String> {
465 for el in node.children_with_tokens() {
466 if let NodeOrToken::Token(t) = el
467 && t.kind() == SyntaxKind::FOOTNOTE_LABEL_ID
468 {
469 return Some(t.text().to_string());
470 }
471 }
472 None
473}
474
475fn parse_reference_def(node: &SyntaxNode) -> Option<(String, String, String)> {
476 let link = node.children().find(|c| c.kind() == SyntaxKind::LINK)?;
477 let label_node = link
478 .children()
479 .find(|c| c.kind() == SyntaxKind::LINK_TEXT)?;
480 let label = label_node.text().to_string();
481
482 let mut tail = String::new();
483 let mut after_link = false;
484 for el in node.children_with_tokens() {
485 if after_link {
486 match el {
487 NodeOrToken::Token(t) => tail.push_str(t.text()),
488 NodeOrToken::Node(n) => tail.push_str(&n.text().to_string()),
489 }
490 } else if let NodeOrToken::Node(n) = &el
491 && n.kind() == SyntaxKind::LINK
492 {
493 after_link = true;
494 }
495 }
496
497 let trimmed = tail.trim_start();
498 let rest = trimmed.strip_prefix(':')?;
499 let after_colon = rest.trim_start();
500 let (url, after_url) = parse_ref_url(after_colon);
501 let title = parse_dest_title(after_url.trim());
502 Some((unescape_label(&label), url, title))
503}
504
505fn parse_ref_url(s: &str) -> (String, &str) {
506 let s = s.trim_start();
507 if let Some(rest) = s.strip_prefix('<')
508 && let Some(end) = rest.find('>')
509 {
510 return (rest[..end].to_string(), &rest[end + 1..]);
511 }
512 let end = s.find(|c: char| c.is_whitespace()).unwrap_or(s.len());
513 (s[..end].to_string(), &s[end..])
514}
515
516fn unescape_label(label: &str) -> String {
517 let mut out = String::with_capacity(label.len());
518 let mut chars = label.chars().peekable();
519 while let Some(ch) = chars.next() {
520 if ch == '\\'
521 && let Some(&next) = chars.peek()
522 && is_ascii_punct(next)
523 {
524 out.push(next);
525 chars.next();
526 } else {
527 out.push(ch);
528 }
529 }
530 out
531}
532
533fn is_ascii_punct(c: char) -> bool {
534 c.is_ascii() && (c.is_ascii_punctuation())
535}
536
537fn normalize_ref_label(label: &str) -> String {
540 let unescaped = unescape_label(label);
541 let mut out = String::new();
542 let mut last_space = false;
543 for ch in unescaped.chars() {
544 if ch.is_whitespace() {
545 if !out.is_empty() && !last_space {
546 out.push(' ');
547 last_space = true;
548 }
549 } else {
550 for lc in ch.to_lowercase() {
551 out.push(lc);
552 }
553 last_space = false;
554 }
555 }
556 if last_space {
557 out.pop();
558 }
559 out
560}
561
562fn lookup_ref(label: &str) -> Option<(String, String)> {
563 let key = normalize_ref_label(label);
564 REFS_CTX.with(|c| c.borrow().refs.get(&key).cloned())
565}
566
567fn lookup_heading_id(label: &str) -> Option<String> {
568 let id = pandoc_slugify(&unescape_label(label));
569 if id.is_empty() {
570 return None;
571 }
572 REFS_CTX.with(|c| {
573 if c.borrow().heading_ids.contains(&id) {
574 Some(id)
575 } else {
576 None
577 }
578 })
579}
580
581pub fn normalize_native(s: &str) -> String {
585 let mut tokens = Vec::new();
586 let bytes = s.as_bytes();
587 let mut i = 0usize;
588 while i < bytes.len() {
589 let c = bytes[i];
590 match c {
591 b' ' | b'\t' | b'\n' | b'\r' => {
592 i += 1;
593 }
594 b'[' | b']' | b'(' | b')' | b',' => {
595 tokens.push((c as char).to_string());
596 i += 1;
597 }
598 b'"' => {
599 let start = i;
601 i += 1;
602 while i < bytes.len() {
603 match bytes[i] {
604 b'\\' if i + 1 < bytes.len() => {
605 i += 2;
606 }
607 b'"' => {
608 i += 1;
609 break;
610 }
611 _ => {
612 i += 1;
613 }
614 }
615 }
616 tokens.push(s[start..i].to_string());
617 }
618 _ => {
619 let start = i;
620 while i < bytes.len() {
621 let b = bytes[i];
622 if matches!(
623 b,
624 b' ' | b'\t' | b'\n' | b'\r' | b'[' | b']' | b'(' | b')' | b',' | b'"'
625 ) {
626 break;
627 }
628 i += 1;
629 }
630 if i > start {
631 tokens.push(s[start..i].to_string());
632 }
633 }
634 }
635 }
636 tokens.join(" ")
637}
638
639#[derive(Debug, Clone)]
643#[allow(clippy::enum_variant_names)]
644enum Block {
645 Para(Vec<Inline>),
646 Plain(Vec<Inline>),
647 Header(usize, Attr, Vec<Inline>),
648 BlockQuote(Vec<Block>),
649 CodeBlock(Attr, String),
650 HorizontalRule,
651 BulletList(Vec<Vec<Block>>),
652 OrderedList(usize, &'static str, &'static str, Vec<Vec<Block>>),
653 RawBlock(String, String),
654 Table(TableData),
655 Div(Attr, Vec<Block>),
656 LineBlock(Vec<Vec<Inline>>),
657 DefinitionList(Vec<(Vec<Inline>, Vec<Vec<Block>>)>),
658 Figure(Attr, Vec<Block>, Vec<Block>),
663 Unsupported(String),
664}
665
666#[derive(Debug, Clone)]
667struct TableData {
668 attr: Attr,
672 caption: Vec<Inline>,
673 aligns: Vec<&'static str>,
674 widths: Vec<Option<f64>>,
676 head_rows: Vec<Vec<GridCell>>,
677 body_rows: Vec<Vec<GridCell>>,
678 foot_rows: Vec<Vec<GridCell>>,
681}
682
683#[derive(Debug, Clone)]
687struct GridCell {
688 row_span: u32,
689 col_span: u32,
690 blocks: Vec<Block>,
691}
692
693impl GridCell {
694 fn no_span(blocks: Vec<Block>) -> Self {
695 Self {
696 row_span: 1,
697 col_span: 1,
698 blocks,
699 }
700 }
701}
702
703#[derive(Debug, Clone)]
704#[allow(clippy::enum_variant_names)]
705enum Inline {
706 Str(String),
707 Space,
708 SoftBreak,
709 LineBreak,
710 Emph(Vec<Inline>),
711 Strong(Vec<Inline>),
712 Strikeout(Vec<Inline>),
713 Superscript(Vec<Inline>),
714 Subscript(Vec<Inline>),
715 Code(Attr, String),
716 Link(Attr, Vec<Inline>, String, String),
717 Image(Attr, Vec<Inline>, String, String),
718 Math(&'static str, String),
719 Span(Attr, Vec<Inline>),
720 RawInline(String, String),
721 Quoted(&'static str, Vec<Inline>),
722 Note(Vec<Block>),
723 Cite(Vec<Citation>, Vec<Inline>),
724 Unsupported(String),
725}
726
727#[derive(Debug, Clone)]
728struct Citation {
729 id: String,
730 prefix: Vec<Inline>,
731 suffix: Vec<Inline>,
732 mode: CitationMode,
733 note_num: i64,
734 hash: i64,
735}
736
737#[derive(Debug, Clone, Copy)]
738enum CitationMode {
739 AuthorInText,
740 NormalCitation,
741 SuppressAuthor,
742}
743
744#[derive(Debug, Default, Clone)]
745struct Attr {
746 id: String,
747 classes: Vec<String>,
748 kvs: Vec<(String, String)>,
749}
750
751fn blocks_from_doc(doc: &SyntaxNode) -> Vec<Block> {
754 let mut out = Vec::new();
755 for child in doc.children() {
756 collect_block(&child, &mut out);
757 }
758 out
759}
760
761fn block_from(node: &SyntaxNode) -> Option<Block> {
762 match node.kind() {
763 SyntaxKind::PARAGRAPH => Some(Block::Para(coalesce_inlines(inlines_from(node)))),
764 SyntaxKind::PLAIN => Some(Block::Plain(coalesce_inlines(inlines_from(node)))),
765 SyntaxKind::HEADING => Some(heading_block(node)),
766 SyntaxKind::BLOCK_QUOTE => Some(Block::BlockQuote(blockquote_blocks(node))),
767 SyntaxKind::CODE_BLOCK => Some(code_block(node)),
768 SyntaxKind::HORIZONTAL_RULE => Some(Block::HorizontalRule),
769 SyntaxKind::LIST => Some(list_block(node)),
770 SyntaxKind::BLANK_LINE => None,
771 SyntaxKind::REFERENCE_DEFINITION => None,
774 SyntaxKind::FOOTNOTE_DEFINITION => None,
777 SyntaxKind::YAML_METADATA => None,
780 SyntaxKind::PANDOC_TITLE_BLOCK => None,
783 SyntaxKind::HTML_BLOCK => Some(html_block(node)),
784 SyntaxKind::HTML_BLOCK_DIV => Some(html_div_block(node)),
785 SyntaxKind::PIPE_TABLE => pipe_table(node).map(Block::Table),
786 SyntaxKind::SIMPLE_TABLE => simple_table(node).map(Block::Table),
787 SyntaxKind::GRID_TABLE => grid_table(node).map(Block::Table),
788 SyntaxKind::MULTILINE_TABLE => multiline_table(node).map(Block::Table),
789 SyntaxKind::TEX_BLOCK => Some(tex_block(node)),
790 SyntaxKind::FENCED_DIV => Some(fenced_div(node)),
791 SyntaxKind::LINE_BLOCK => Some(line_block(node)),
792 SyntaxKind::DEFINITION_LIST => Some(definition_list(node)),
793 SyntaxKind::FIGURE => Some(figure_block(node)),
794 other => Some(Block::Unsupported(format!("{other:?}"))),
795 }
796}
797
798fn figure_block(node: &SyntaxNode) -> Block {
805 let mut alt: Vec<Inline> = Vec::new();
806 let mut image_inline: Option<Inline> = None;
807 if let Some(image) = node.children().find(|c| c.kind() == SyntaxKind::IMAGE_LINK) {
808 let alt_node = image.children().find(|c| c.kind() == SyntaxKind::IMAGE_ALT);
809 if let Some(an) = alt_node {
810 alt = coalesce_inlines(inlines_from(&an));
811 }
812 let mut tmp = Vec::new();
813 render_image_inline(&image, &mut tmp);
814 if let Some(first) = tmp.into_iter().next() {
815 image_inline = Some(first);
816 }
817 }
818 let (figure_attr, image_inline) = match image_inline {
821 Some(Inline::Image(mut attr, alt_inlines, url, title)) if !attr.id.is_empty() => {
822 let fig_attr = Attr::with_id(std::mem::take(&mut attr.id));
823 (fig_attr, Some(Inline::Image(attr, alt_inlines, url, title)))
824 }
825 other => (Attr::default(), other),
826 };
827 let caption = if alt.is_empty() {
828 Vec::new()
829 } else {
830 vec![Block::Plain(alt)]
831 };
832 let body = match image_inline {
833 Some(img) => vec![Block::Plain(vec![img])],
834 None => Vec::new(),
835 };
836 Block::Figure(figure_attr, caption, body)
837}
838
839fn heading_block(node: &SyntaxNode) -> Block {
840 let level = heading_level(node);
841 let inlines = node
842 .children()
843 .find(|c| c.kind() == SyntaxKind::HEADING_CONTENT)
844 .map(|c| coalesce_inlines(inlines_from(&c)))
845 .unwrap_or_default();
846 let offset: u32 = node.text_range().start().into();
850 let final_id = REFS_CTX
851 .with(|c| c.borrow().heading_id_by_offset.get(&offset).cloned())
852 .unwrap_or_default();
853 let attr = node
854 .children_with_tokens()
855 .find_map(|el| match el {
856 NodeOrToken::Node(n) if n.kind() == SyntaxKind::ATTRIBUTE => Some(n.text().to_string()),
857 NodeOrToken::Token(t) if t.kind() == SyntaxKind::ATTRIBUTE => {
858 Some(t.text().to_string())
859 }
860 _ => None,
861 })
862 .map(|raw| {
863 let trimmed = raw.trim();
864 if let Some(inner) = trimmed.strip_prefix('{').and_then(|s| s.strip_suffix('}')) {
865 let mut attr = parse_attr_block(inner);
866 if attr.id.is_empty() {
867 attr.id = final_id.clone();
868 }
869 attr
870 } else {
871 Attr::with_id(final_id.clone())
872 }
873 })
874 .unwrap_or_else(|| Attr::with_id(final_id));
875 Block::Header(level, attr, inlines)
876}
877
878fn heading_level(node: &SyntaxNode) -> usize {
879 for child in node.children() {
880 if child.kind() == SyntaxKind::ATX_HEADING_MARKER {
881 for tok in child.children_with_tokens() {
882 if let Some(t) = tok.as_token()
883 && t.kind() == SyntaxKind::ATX_HEADING_MARKER
884 {
885 return t.text().chars().filter(|&c| c == '#').count();
886 }
887 }
888 }
889 }
890 for el in node.descendants_with_tokens() {
891 if let NodeOrToken::Token(t) = el
892 && t.kind() == SyntaxKind::SETEXT_HEADING_UNDERLINE
893 {
894 return if t.text().trim_start().starts_with('=') {
895 1
896 } else {
897 2
898 };
899 }
900 }
901 1
902}
903
904fn blockquote_blocks(node: &SyntaxNode) -> Vec<Block> {
905 let mut out = Vec::new();
906 for child in node.children() {
907 collect_block(&child, &mut out);
908 }
909 out
910}
911
912fn code_block(node: &SyntaxNode) -> Block {
913 let raw_format = code_block_raw_format(node);
914 let attr = code_block_attr(node);
915 let is_fenced = node
916 .children()
917 .any(|c| c.kind() == SyntaxKind::CODE_FENCE_OPEN);
918 let mut content = String::new();
919 for child in node.children() {
920 if child.kind() == SyntaxKind::CODE_CONTENT {
921 content.push_str(&child.text().to_string());
922 }
923 }
924 while content.ends_with('\n') {
926 content.pop();
927 }
928 if is_fenced {
929 content = content
934 .split('\n')
935 .map(expand_tabs_to_4)
936 .collect::<Vec<_>>()
937 .join("\n");
938 } else {
939 content = strip_indented_code_indent(&content);
940 }
941 if let Some(fmt) = raw_format {
942 return Block::RawBlock(fmt, content);
943 }
944 Block::CodeBlock(attr, content)
945}
946
947fn code_block_raw_format(node: &SyntaxNode) -> Option<String> {
952 let open = node
953 .children()
954 .find(|c| c.kind() == SyntaxKind::CODE_FENCE_OPEN)?;
955 let info = open
956 .children()
957 .find(|c| c.kind() == SyntaxKind::CODE_INFO)?;
958 let raw = info.text().to_string();
959 let trimmed = raw.trim();
960 let inner = trimmed
961 .strip_prefix('{')
962 .and_then(|s| s.strip_suffix('}'))?;
963 let inner = inner.trim();
964 let format = inner.strip_prefix('=')?.trim();
965 if format.is_empty() || format.contains(char::is_whitespace) {
966 return None;
967 }
968 Some(format.to_string())
969}
970
971fn code_block_attr(node: &SyntaxNode) -> Attr {
972 let Some(open) = node
973 .children()
974 .find(|c| c.kind() == SyntaxKind::CODE_FENCE_OPEN)
975 else {
976 return Attr::default();
977 };
978 let Some(info) = open.children().find(|c| c.kind() == SyntaxKind::CODE_INFO) else {
979 return Attr::default();
980 };
981 let raw = info.text().to_string();
982 let trimmed = raw.trim();
983 if let Some(inner) = trimmed.strip_prefix('{').and_then(|s| s.strip_suffix('}')) {
984 return parse_attr_block(inner);
985 }
986 if let Some(brace) = trimmed.find('{')
989 && trimmed.ends_with('}')
990 {
991 let lang = trimmed[..brace].trim();
992 let attr_inner = &trimmed[brace + 1..trimmed.len() - 1];
993 let mut attr = parse_attr_block(attr_inner);
994 if !lang.is_empty() {
995 attr.classes.insert(0, normalize_lang_id(lang));
996 }
997 return attr;
998 }
999 if !trimmed.is_empty() {
1000 return Attr {
1001 id: String::new(),
1002 classes: vec![normalize_lang_id(trimmed)],
1003 kvs: Vec::new(),
1004 };
1005 }
1006 Attr::default()
1007}
1008
1009fn normalize_lang_id(lang: &str) -> String {
1013 let lower = lang.to_ascii_lowercase();
1014 match lower.as_str() {
1015 "c++" => "cpp".to_string(),
1016 "objective-c" => "objectivec".to_string(),
1017 _ => lower,
1018 }
1019}
1020
1021fn strip_indented_code_indent(s: &str) -> String {
1025 let mut out = String::with_capacity(s.len());
1026 for (i, line) in s.split('\n').enumerate() {
1027 if i > 0 {
1028 out.push('\n');
1029 }
1030 let expanded = expand_tabs_to_4(line);
1034 let stripped = if let Some(rest) = expanded.strip_prefix(" ") {
1035 rest.to_string()
1036 } else if let Some(rest) = expanded.strip_prefix('\t') {
1037 rest.to_string()
1038 } else {
1039 expanded
1043 };
1044 out.push_str(&stripped);
1045 }
1046 out
1047}
1048
1049fn expand_tabs_to_4(line: &str) -> String {
1053 let mut out = String::with_capacity(line.len());
1054 let mut col = 0usize;
1055 for c in line.chars() {
1056 if c == '\t' {
1057 let next = (col / 4 + 1) * 4;
1058 for _ in col..next {
1059 out.push(' ');
1060 }
1061 col = next;
1062 } else {
1063 out.push(c);
1064 col += 1;
1065 }
1066 }
1067 out
1068}
1069
1070fn html_block(node: &SyntaxNode) -> Block {
1076 let mut content = node.text().to_string();
1077 while content.ends_with('\n') {
1078 content.pop();
1079 }
1080 Block::RawBlock("html".to_string(), content)
1081}
1082
1083fn html_div_block(node: &SyntaxNode) -> Block {
1100 let attr = cst_div_open_tag_attr(node);
1101 if div_has_structural_inner(node) {
1102 let mut blocks = Vec::new();
1103 for child in node.children() {
1104 match child.kind() {
1105 SyntaxKind::HTML_BLOCK_TAG | SyntaxKind::BLANK_LINE => {}
1106 _ => collect_block(&child, &mut blocks),
1107 }
1108 }
1109 return Block::Div(attr, blocks);
1110 }
1111 debug_assert!(
1112 false,
1113 "HTML_BLOCK_DIV without structural inner shape — parser regression"
1114 );
1115 Block::Div(attr, Vec::new())
1116}
1117
1118fn collect_html_block_text_skip_bq_markers(node: &SyntaxNode) -> String {
1135 let mut out = String::new();
1136 let mut skip_next_ws = false;
1137 let mut at_line_start = true;
1138 walk_skip_bq_markers(node, &mut out, &mut skip_next_ws, &mut at_line_start);
1139 out
1140}
1141
1142fn walk_skip_bq_markers(
1143 node: &SyntaxNode,
1144 out: &mut String,
1145 skip_next_ws: &mut bool,
1146 at_line_start: &mut bool,
1147) {
1148 for child in node.children_with_tokens() {
1149 match child {
1150 NodeOrToken::Node(n) => walk_skip_bq_markers(&n, out, skip_next_ws, at_line_start),
1151 NodeOrToken::Token(t) => {
1152 if t.kind() == SyntaxKind::BLOCK_QUOTE_MARKER {
1153 *skip_next_ws = true;
1154 *at_line_start = false;
1155 continue;
1156 }
1157 if *skip_next_ws && t.kind() == SyntaxKind::WHITESPACE {
1158 *skip_next_ws = false;
1159 *at_line_start = false;
1160 continue;
1161 }
1162 if *at_line_start && t.kind() == SyntaxKind::WHITESPACE {
1163 *at_line_start = false;
1164 continue;
1165 }
1166 *skip_next_ws = false;
1167 let kind = t.kind();
1168 out.push_str(t.text());
1169 *at_line_start = kind == SyntaxKind::NEWLINE || kind == SyntaxKind::BLANK_LINE;
1170 }
1171 }
1172 }
1173}
1174
1175fn div_has_structural_inner(node: &SyntaxNode) -> bool {
1191 let mut tags = node
1192 .children()
1193 .filter(|c| c.kind() == SyntaxKind::HTML_BLOCK_TAG);
1194 let Some(open_tag) = tags.next() else {
1195 return false;
1196 };
1197 let close_tag = tags.next();
1202 if tags.next().is_some() {
1203 return false;
1204 }
1205 if !html_block_open_tag_is_clean(&open_tag) {
1206 return false;
1207 }
1208 if let Some(close_tag) = close_tag.as_ref()
1209 && !html_block_close_tag_is_clean(close_tag)
1210 {
1211 return false;
1212 }
1213 !node
1214 .children()
1215 .any(|c| c.kind() == SyntaxKind::HTML_BLOCK_CONTENT)
1216}
1217
1218fn html_block_open_tag_is_clean(open_tag: &SyntaxNode) -> bool {
1226 let mut seen_gt = false;
1227 for child in open_tag.children_with_tokens() {
1228 let NodeOrToken::Token(t) = child else {
1229 continue;
1232 };
1233 if !seen_gt {
1234 if t.kind() == SyntaxKind::TEXT && t.text().ends_with('>') {
1235 seen_gt = true;
1236 }
1237 } else if t.kind() != SyntaxKind::NEWLINE {
1238 return false;
1239 }
1240 }
1241 seen_gt
1242}
1243
1244fn html_block_close_tag_is_clean(close_tag: &SyntaxNode) -> bool {
1248 for child in close_tag.children_with_tokens() {
1249 if let NodeOrToken::Token(t) = child
1250 && t.kind() == SyntaxKind::TEXT
1251 {
1252 return t.text().starts_with("</");
1253 }
1254 }
1255 false
1256}
1257
1258fn cst_div_open_tag_attr(node: &SyntaxNode) -> Attr {
1266 let Some(open_tag) = node
1267 .children()
1268 .find(|c| c.kind() == SyntaxKind::HTML_BLOCK_TAG)
1269 else {
1270 return Attr::default();
1271 };
1272 let mut parts: Vec<String> = Vec::new();
1273 for child in open_tag.children() {
1274 if child.kind() == SyntaxKind::HTML_ATTRS {
1275 parts.push(child.text().to_string());
1276 }
1277 }
1278 if parts.is_empty() {
1279 return Attr::default();
1280 }
1281 parse_html_attrs(parts.join(" ").trim())
1282}
1283
1284fn emit_html_block(node: &SyntaxNode, out: &mut Vec<Block>) {
1300 if html_block_has_structural_lift(node) {
1308 emit_html_block_structural(node, out);
1309 return;
1310 }
1311 let mut content = collect_html_block_text_skip_bq_markers(node);
1317 while content
1322 .as_bytes()
1323 .last()
1324 .is_some_and(|b| matches!(b, b'\n' | b'\r' | b' ' | b'\t'))
1325 {
1326 content.pop();
1327 }
1328 let leading_ws = content
1329 .as_bytes()
1330 .iter()
1331 .position(|&b| b != b' ' && b != b'\t')
1332 .unwrap_or(content.len());
1333 let trimmed = &content[leading_ws..];
1334 let strip_first_line_indent = leading_ws > 0
1341 && leading_ws <= 3
1342 && content.as_bytes()[..leading_ws].iter().all(|&b| b == b' ');
1343 if trimmed.starts_with("<!--")
1344 || trimmed.starts_with("<?")
1345 || trimmed.starts_with("<![CDATA[")
1346 || trimmed.starts_with("<!")
1347 || is_raw_text_element_open(trimmed)
1348 {
1349 let raw = if strip_first_line_indent {
1350 content[leading_ws..].to_string()
1351 } else {
1352 content
1353 };
1354 out.push(Block::RawBlock("html".to_string(), raw));
1355 return;
1356 }
1357 let walker_input = if strip_first_line_indent {
1358 &content[leading_ws..]
1359 } else {
1360 content.as_str()
1361 };
1362 split_html_block_by_tags(walker_input, out);
1363}
1364
1365fn html_block_has_structural_lift(node: &SyntaxNode) -> bool {
1373 let mut tags = node
1374 .children()
1375 .filter(|c| c.kind() == SyntaxKind::HTML_BLOCK_TAG);
1376 let Some(open_tag) = tags.next() else {
1377 return false;
1378 };
1379 let Some(close_tag) = tags.next() else {
1380 return false;
1381 };
1382 if tags.next().is_some() {
1383 return false;
1384 }
1385 if !html_block_open_tag_is_clean(&open_tag) {
1386 return false;
1387 }
1388 if !html_block_close_tag_is_clean(&close_tag) {
1389 return false;
1390 }
1391 !node
1392 .children()
1393 .any(|c| c.kind() == SyntaxKind::HTML_BLOCK_CONTENT)
1394}
1395
1396fn emit_html_block_structural(node: &SyntaxNode, out: &mut Vec<Block>) {
1403 for child in node.children() {
1404 match child.kind() {
1405 SyntaxKind::HTML_BLOCK_TAG => {
1406 let text = open_tag_raw_block_text(&child);
1407 out.push(Block::RawBlock("html".to_string(), text));
1408 }
1409 SyntaxKind::BLANK_LINE => {}
1410 _ => collect_block(&child, out),
1411 }
1412 }
1413}
1414
1415fn open_tag_raw_block_text(tag: &SyntaxNode) -> String {
1425 let has_attrs = tag.children().any(|c| c.kind() == SyntaxKind::HTML_ATTRS);
1426 if has_attrs {
1427 let mut name_prefix: Option<String> = None;
1428 let mut attrs: Vec<String> = Vec::new();
1429 for child in tag.children_with_tokens() {
1430 match child {
1431 NodeOrToken::Token(t) if t.kind() == SyntaxKind::TEXT => {
1432 let text = t.text();
1433 if name_prefix.is_none() && text.starts_with('<') {
1434 if let Some(gt_idx) = text.find('>') {
1435 return text[..=gt_idx].to_string();
1439 }
1440 name_prefix = Some(text.to_string());
1441 }
1442 }
1443 NodeOrToken::Node(n) if n.kind() == SyntaxKind::HTML_ATTRS => {
1444 let attr_text = n.text().to_string();
1445 let trimmed = attr_text.trim();
1446 if !trimmed.is_empty() {
1447 attrs.push(trimmed.to_string());
1448 }
1449 }
1450 _ => {}
1451 }
1452 }
1453 let mut result = name_prefix.unwrap_or_default();
1454 for attr in &attrs {
1455 result.push(' ');
1456 result.push_str(attr);
1457 }
1458 result.push('>');
1459 return result;
1460 }
1461 let mut text = String::new();
1470 let mut skip_next_ws = false;
1471 for child in tag.children_with_tokens() {
1472 if let NodeOrToken::Token(t) = child {
1473 if t.kind() == SyntaxKind::BLOCK_QUOTE_MARKER {
1474 skip_next_ws = true;
1475 continue;
1476 }
1477 if skip_next_ws && t.kind() == SyntaxKind::WHITESPACE {
1478 skip_next_ws = false;
1479 continue;
1480 }
1481 if text.is_empty() && t.kind() == SyntaxKind::WHITESPACE {
1482 continue;
1483 }
1484 skip_next_ws = false;
1485 text.push_str(t.text());
1486 }
1487 }
1488 while text.ends_with('\n') {
1489 text.pop();
1490 }
1491 text
1492}
1493
1494fn split_html_block_by_tags(content: &str, out: &mut Vec<Block>) {
1509 use crate::parser::blocks::html_blocks::{
1510 is_pandoc_block_tag_name, is_pandoc_inline_block_tag_name, is_pandoc_void_block_tag_name,
1511 };
1512 use crate::parser::inlines::inline_html::{parse_close_tag, parse_open_tag};
1513
1514 let bytes = content.as_bytes();
1515 let mut i = 0usize;
1516 let mut text_start = 0usize;
1517 let mut inline_pending = false;
1518 let mut consecutive_newlines = 0usize;
1519 while i < bytes.len() {
1520 let b = bytes[i];
1521 if b == b'\n' {
1522 consecutive_newlines += 1;
1523 if consecutive_newlines >= 2 {
1527 inline_pending = false;
1528 }
1529 i += 1;
1530 continue;
1531 }
1532 consecutive_newlines = 0;
1533 if b != b'<' {
1534 if !b.is_ascii_whitespace() {
1535 inline_pending = true;
1536 }
1537 i += 1;
1538 continue;
1539 }
1540 let rest = &content[i..];
1541 let open_end = parse_open_tag(rest);
1542 let close_end = parse_close_tag(rest);
1543 let Some((tag_end, is_close)) = open_end
1544 .map(|n| (n, false))
1545 .or_else(|| close_end.map(|n| (n, true)))
1546 else {
1547 inline_pending = true;
1548 i += 1;
1549 continue;
1550 };
1551 let tag_text = &rest[..tag_end];
1552 let Some(name) = extract_html_tag_name(tag_text) else {
1553 inline_pending = true;
1554 i += 1;
1555 continue;
1556 };
1557 if is_pandoc_block_tag_name(name) {
1558 if i > text_start {
1565 flush_html_block_text(&content[text_start..i], out);
1566 }
1567 out.push(Block::RawBlock("html".to_string(), tag_text.to_string()));
1568 i += tag_end;
1569 text_start = i;
1570 inline_pending = false;
1571 continue;
1572 }
1573 if is_pandoc_inline_block_tag_name(name) {
1574 if !inline_pending {
1593 if !is_close
1594 && let Some((close_start, close_end)) =
1595 find_matching_html_close_with_start(content, i, name)
1596 && !interior_starts_with_void_block_tag(content, i + tag_end)
1597 {
1598 if i > text_start {
1599 flush_html_block_text(&content[text_start..i], out);
1600 }
1601 out.push(Block::RawBlock("html".to_string(), tag_text.to_string()));
1602 let interior = &content[i + tag_end..close_start];
1603 flush_html_block_text(interior, out);
1604 let close_text = &content[close_start..close_end];
1605 out.push(Block::RawBlock("html".to_string(), close_text.to_string()));
1606 i = close_end;
1607 text_start = i;
1608 inline_pending = false;
1609 continue;
1610 }
1611 if i > text_start {
1612 flush_html_block_text(&content[text_start..i], out);
1613 }
1614 out.push(Block::RawBlock("html".to_string(), tag_text.to_string()));
1615 i += tag_end;
1616 text_start = i;
1617 inline_pending = false;
1618 continue;
1619 }
1620 inline_pending = true;
1621 i += tag_end;
1622 continue;
1623 }
1624 if is_pandoc_void_block_tag_name(name) {
1625 if !inline_pending {
1634 if i > text_start {
1635 flush_html_block_text(&content[text_start..i], out);
1636 }
1637 out.push(Block::RawBlock("html".to_string(), tag_text.to_string()));
1638 i += tag_end;
1639 text_start = i;
1640 inline_pending = false;
1641 continue;
1642 }
1643 inline_pending = true;
1644 i += tag_end;
1645 continue;
1646 }
1647 inline_pending = true;
1651 i += tag_end;
1652 }
1653 if text_start < bytes.len() {
1654 flush_html_block_tail_text(&content[text_start..], out);
1659 }
1660}
1661
1662fn flush_html_block_text(text: &str, out: &mut Vec<Block>) {
1670 if text.trim().is_empty() {
1671 return;
1672 }
1673 let trailing_blank = trailing_newlines(text) >= 2;
1674 let mut blocks = parse_pandoc_blocks(text);
1675 if blocks.is_empty() {
1676 return;
1677 }
1678 if !trailing_blank
1679 && let Some(Block::Para(_)) = blocks.last()
1680 && let Some(Block::Para(inlines)) = blocks.pop()
1681 {
1682 blocks.push(Block::Plain(inlines));
1683 }
1684 out.extend(blocks);
1685}
1686
1687fn flush_html_block_tail_text(text: &str, out: &mut Vec<Block>) {
1691 if text.trim().is_empty() {
1692 return;
1693 }
1694 let blocks = parse_pandoc_blocks(text);
1695 out.extend(blocks);
1696}
1697
1698fn trailing_newlines(s: &str) -> usize {
1699 s.bytes().rev().take_while(|&b| b == b'\n').count()
1700}
1701
1702fn interior_starts_with_void_block_tag(content: &str, interior_start: usize) -> bool {
1711 use crate::parser::blocks::html_blocks::is_pandoc_void_block_tag_name;
1712 use crate::parser::inlines::inline_html::parse_open_tag;
1713
1714 let bytes = content.as_bytes();
1715 let mut i = interior_start;
1716 while i < bytes.len() && matches!(bytes[i], b'\n' | b' ' | b'\t') {
1717 i += 1;
1718 }
1719 if i >= bytes.len() || bytes[i] != b'<' {
1720 return false;
1721 }
1722 let rest = &content[i..];
1723 let Some(end) = parse_open_tag(rest) else {
1724 return false;
1725 };
1726 extract_html_tag_name(&rest[..end]).is_some_and(is_pandoc_void_block_tag_name)
1727}
1728
1729fn extract_html_tag_name(tag_text: &str) -> Option<&str> {
1732 let bytes = tag_text.as_bytes();
1733 if bytes.first() != Some(&b'<') {
1734 return None;
1735 }
1736 let start = if bytes.get(1) == Some(&b'/') { 2 } else { 1 };
1737 let mut end = start;
1738 while end < bytes.len() && (bytes[end].is_ascii_alphanumeric() || bytes[end] == b'-') {
1739 end += 1;
1740 }
1741 if start == end {
1742 None
1743 } else {
1744 Some(&tag_text[start..end])
1745 }
1746}
1747
1748fn find_matching_html_close_with_start(
1753 content: &str,
1754 start: usize,
1755 name: &str,
1756) -> Option<(usize, usize)> {
1757 use crate::parser::inlines::inline_html::{parse_close_tag, parse_open_tag};
1758
1759 let bytes = content.as_bytes();
1760 let opener_end = parse_open_tag(&content[start..])?;
1761 let mut i = start + opener_end;
1762 let mut depth = 1usize;
1763 while i < bytes.len() {
1764 if bytes[i] != b'<' {
1765 i += 1;
1766 continue;
1767 }
1768 let rest = &content[i..];
1769 if let Some(end) = parse_open_tag(rest) {
1770 let tag = &rest[..end];
1771 if extract_html_tag_name(tag).is_some_and(|n| n.eq_ignore_ascii_case(name)) {
1772 depth += 1;
1773 }
1774 i += end;
1775 continue;
1776 }
1777 if let Some(end) = parse_close_tag(rest) {
1778 let tag = &rest[..end];
1779 if extract_html_tag_name(tag).is_some_and(|n| n.eq_ignore_ascii_case(name)) {
1780 depth -= 1;
1781 if depth == 0 {
1782 return Some((i, i + end));
1783 }
1784 }
1785 i += end;
1786 continue;
1787 }
1788 i += 1;
1789 }
1790 None
1791}
1792
1793fn is_raw_text_element_open(s: &str) -> bool {
1798 let bytes = s.as_bytes();
1799 if bytes.is_empty() || bytes[0] != b'<' {
1800 return false;
1801 }
1802 let rest = &s[1..];
1803 for tag in ["script", "style", "pre", "textarea"] {
1804 if rest.len() < tag.len() {
1805 continue;
1806 }
1807 if rest[..tag.len()].eq_ignore_ascii_case(tag) {
1808 let after = rest.as_bytes().get(tag.len()).copied();
1809 match after {
1810 None => return true,
1811 Some(b' ') | Some(b'\t') | Some(b'\n') | Some(b'>') | Some(b'/') => {
1812 return true;
1813 }
1814 _ => {}
1815 }
1816 }
1817 }
1818 false
1819}
1820
1821fn collect_block(node: &SyntaxNode, out: &mut Vec<Block>) {
1825 if node.kind() == SyntaxKind::HTML_BLOCK_DIV {
1826 out.push(html_div_block(node));
1830 return;
1831 }
1832 if node.kind() == SyntaxKind::HTML_BLOCK {
1833 emit_html_block(node, out);
1838 return;
1839 }
1840 if let Some(b) = block_from(node) {
1841 out.push(b);
1842 }
1843}
1844
1845fn parse_pandoc_blocks(text: &str) -> Vec<Block> {
1849 if text.trim().is_empty() {
1850 return Vec::new();
1851 }
1852 let opts = crate::ParserOptions {
1853 flavor: crate::Flavor::Pandoc,
1854 dialect: crate::Dialect::for_flavor(crate::Flavor::Pandoc),
1855 extensions: crate::Extensions::for_flavor(crate::Flavor::Pandoc),
1856 ..crate::ParserOptions::default()
1857 };
1858 let doc = crate::parse(text, Some(opts));
1859 let outer = REFS_CTX.with(|c| std::mem::take(&mut *c.borrow_mut()));
1870 let inner_ctx = build_refs_ctx_inherited(&doc, Some(&outer));
1871 REFS_CTX.with(|c| *c.borrow_mut() = inner_ctx);
1872 let mut out = Vec::new();
1873 for child in doc.children() {
1874 collect_block(&child, &mut out);
1875 }
1876 REFS_CTX.with(|c| *c.borrow_mut() = outer);
1877 out
1878}
1879
1880fn tex_block(node: &SyntaxNode) -> Block {
1881 let mut content = node.text().to_string();
1882 while content.ends_with('\n') {
1883 content.pop();
1884 }
1885 Block::RawBlock("tex".to_string(), content)
1886}
1887
1888fn fenced_div(node: &SyntaxNode) -> Block {
1889 let attr = node
1890 .children()
1891 .find(|c| c.kind() == SyntaxKind::DIV_FENCE_OPEN)
1892 .map(|open| {
1893 let info = open
1894 .children()
1895 .find(|c| c.kind() == SyntaxKind::DIV_INFO)
1896 .map(|n| n.text().to_string())
1897 .unwrap_or_default();
1898 parse_div_info(info.trim())
1899 })
1900 .unwrap_or_default();
1901 let mut blocks = Vec::new();
1902 for child in node.children() {
1903 match child.kind() {
1904 SyntaxKind::DIV_FENCE_OPEN | SyntaxKind::DIV_FENCE_CLOSE => {}
1905 _ => collect_block(&child, &mut blocks),
1906 }
1907 }
1908 Block::Div(attr, blocks)
1909}
1910
1911fn parse_div_info(info: &str) -> Attr {
1914 if info.starts_with('{') && info.ends_with('}') {
1915 return parse_attr_block(&info[1..info.len() - 1]);
1916 }
1917 if !info.is_empty() {
1918 return Attr {
1919 id: String::new(),
1920 classes: vec![info.to_string()],
1921 kvs: Vec::new(),
1922 };
1923 }
1924 Attr::default()
1925}
1926
1927fn extract_attr_from_node(parent: &SyntaxNode) -> Attr {
1931 let raw = parent.children_with_tokens().find_map(|el| match el {
1932 NodeOrToken::Node(n) if n.kind() == SyntaxKind::ATTRIBUTE => Some(n.text().to_string()),
1933 NodeOrToken::Token(t) if t.kind() == SyntaxKind::ATTRIBUTE => Some(t.text().to_string()),
1934 _ => None,
1935 });
1936 let Some(raw) = raw else {
1937 return Attr::default();
1938 };
1939 let trimmed = raw.trim();
1940 if let Some(inner) = trimmed.strip_prefix('{').and_then(|s| s.strip_suffix('}')) {
1941 parse_attr_block(inner)
1942 } else {
1943 Attr::default()
1944 }
1945}
1946
1947fn parse_attr_block(s: &str) -> Attr {
1951 let mut id = String::new();
1952 let mut classes: Vec<String> = Vec::new();
1953 let mut kvs: Vec<(String, String)> = Vec::new();
1954 let bytes = s.as_bytes();
1955 let mut i = 0usize;
1956 while i < bytes.len() {
1957 match bytes[i] {
1958 b' ' | b'\t' | b'\n' | b'\r' => {
1959 i += 1;
1960 }
1961 b'#' => {
1962 let start = i + 1;
1963 let mut j = start;
1964 while j < bytes.len() && !matches!(bytes[j], b' ' | b'\t' | b'\n' | b'\r') {
1965 j += 1;
1966 }
1967 id = s[start..j].to_string();
1968 i = j;
1969 }
1970 b'.' => {
1971 let start = i + 1;
1972 let mut j = start;
1973 while j < bytes.len() && !matches!(bytes[j], b' ' | b'\t' | b'\n' | b'\r') {
1974 j += 1;
1975 }
1976 classes.push(s[start..j].to_string());
1977 i = j;
1978 }
1979 _ => {
1980 let key_start = i;
1982 while i < bytes.len() && !matches!(bytes[i], b' ' | b'\t' | b'\n' | b'\r' | b'=') {
1983 i += 1;
1984 }
1985 let key = s[key_start..i].to_string();
1986 if i < bytes.len() && bytes[i] == b'=' {
1987 i += 1;
1988 let value = if i < bytes.len() && bytes[i] == b'"' {
1989 i += 1;
1990 let v_start = i;
1991 while i < bytes.len() && bytes[i] != b'"' {
1992 i += 1;
1993 }
1994 let v = s[v_start..i].to_string();
1995 if i < bytes.len() {
1996 i += 1;
1997 }
1998 v
1999 } else {
2000 let v_start = i;
2001 while i < bytes.len() && !matches!(bytes[i], b' ' | b'\t' | b'\n' | b'\r') {
2002 i += 1;
2003 }
2004 s[v_start..i].to_string()
2005 };
2006 kvs.push((key, value));
2007 } else if !key.is_empty() {
2008 classes.push(key);
2010 }
2011 }
2012 }
2013 }
2014 Attr { id, classes, kvs }
2015}
2016
2017fn parse_html_attrs(s: &str) -> Attr {
2020 let mut id = String::new();
2021 let mut classes: Vec<String> = Vec::new();
2022 let mut kvs: Vec<(String, String)> = Vec::new();
2023 let bytes = s.as_bytes();
2024 let mut i = 0usize;
2025 while i < bytes.len() {
2026 match bytes[i] {
2027 b' ' | b'\t' | b'\n' | b'\r' => {
2028 i += 1;
2029 }
2030 _ => {
2031 let key_start = i;
2032 while i < bytes.len() && !matches!(bytes[i], b' ' | b'\t' | b'\n' | b'\r' | b'=') {
2033 i += 1;
2034 }
2035 let key = s[key_start..i].to_string();
2036 let value = if i < bytes.len() && bytes[i] == b'=' {
2037 i += 1;
2038 if i < bytes.len() && (bytes[i] == b'"' || bytes[i] == b'\'') {
2039 let quote = bytes[i];
2040 i += 1;
2041 let v_start = i;
2042 while i < bytes.len() && bytes[i] != quote {
2043 i += 1;
2044 }
2045 let v = s[v_start..i].to_string();
2046 if i < bytes.len() {
2047 i += 1;
2048 }
2049 v
2050 } else {
2051 let v_start = i;
2052 while i < bytes.len() && !matches!(bytes[i], b' ' | b'\t' | b'\n' | b'\r') {
2053 i += 1;
2054 }
2055 s[v_start..i].to_string()
2056 }
2057 } else {
2058 String::new()
2059 };
2060 if key.is_empty() {
2061 continue;
2062 }
2063 match key.as_str() {
2064 "class" => {
2065 for c in value.split_ascii_whitespace() {
2066 classes.push(c.to_string());
2067 }
2068 }
2069 "id" => id = value,
2070 _ => kvs.push((key, value)),
2071 }
2072 }
2073 }
2074 }
2075 Attr { id, classes, kvs }
2076}
2077
2078fn definition_list(node: &SyntaxNode) -> Block {
2079 let items: Vec<(Vec<Inline>, Vec<Vec<Block>>)> = node
2080 .children()
2081 .filter(|c| c.kind() == SyntaxKind::DEFINITION_ITEM)
2082 .map(|item| {
2083 let term = item
2084 .children()
2085 .find(|c| c.kind() == SyntaxKind::TERM)
2086 .map(|t| coalesce_inlines(inlines_from(&t)))
2087 .unwrap_or_default();
2088 let loose = is_loose_definition_item(&item);
2089 let defs: Vec<Vec<Block>> = item
2090 .children()
2091 .filter(|c| c.kind() == SyntaxKind::DEFINITION)
2092 .map(|d| definition_blocks(&d, loose))
2093 .collect();
2094 (term, defs)
2095 })
2096 .collect();
2097 Block::DefinitionList(items)
2098}
2099
2100fn is_loose_definition_item(item: &SyntaxNode) -> bool {
2106 let mut saw_term = false;
2107 for child in item.children_with_tokens() {
2108 if let NodeOrToken::Node(n) = child {
2109 match n.kind() {
2110 SyntaxKind::TERM => {
2111 saw_term = true;
2112 }
2113 SyntaxKind::BLANK_LINE if saw_term => {
2114 return true;
2115 }
2116 SyntaxKind::DEFINITION => {
2117 return false;
2118 }
2119 _ => {}
2120 }
2121 }
2122 }
2123 false
2124}
2125
2126fn definition_blocks(def_node: &SyntaxNode, loose: bool) -> Vec<Block> {
2127 let extra = definition_content_offset(def_node);
2132 let mut out = Vec::new();
2133 for child in def_node.children() {
2134 match child.kind() {
2135 SyntaxKind::PLAIN => {
2136 let inlines = coalesce_inlines(inlines_from(&child));
2137 if loose {
2138 out.push(Block::Para(inlines));
2139 } else {
2140 out.push(Block::Plain(inlines));
2141 }
2142 }
2143 SyntaxKind::PARAGRAPH => {
2144 out.push(Block::Para(coalesce_inlines(inlines_from(&child))));
2145 }
2146 SyntaxKind::CODE_BLOCK if extra > 0 => {
2147 out.push(indented_code_block_with_extra_strip(&child, extra));
2148 }
2149 _ => collect_block(&child, &mut out),
2150 }
2151 }
2152 out
2153}
2154
2155fn definition_content_offset(def_node: &SyntaxNode) -> usize {
2160 let mut col = 0usize;
2161 let mut saw_marker = false;
2162 for el in def_node.children_with_tokens() {
2163 if let NodeOrToken::Token(t) = el {
2164 match t.kind() {
2165 SyntaxKind::DEFINITION_MARKER => {
2166 col = advance_col(col, t.text());
2167 saw_marker = true;
2168 }
2169 SyntaxKind::WHITESPACE if saw_marker => {
2170 return advance_col(col, t.text());
2171 }
2172 _ if saw_marker => return col,
2173 _ => {}
2174 }
2175 } else if saw_marker {
2176 return col;
2177 }
2178 }
2179 col
2180}
2181
2182fn advance_col(start: usize, s: &str) -> usize {
2185 let mut col = start;
2186 for c in s.chars() {
2187 if c == '\t' {
2188 col = (col / 4 + 1) * 4;
2189 } else {
2190 col += 1;
2191 }
2192 }
2193 col
2194}
2195
2196fn line_block(node: &SyntaxNode) -> Block {
2197 let lines: Vec<Vec<Inline>> = node
2198 .children()
2199 .filter(|c| c.kind() == SyntaxKind::LINE_BLOCK_LINE)
2200 .map(|line| {
2201 let mut out = Vec::new();
2202 for el in line.children_with_tokens() {
2203 match el {
2204 NodeOrToken::Token(t) => match t.kind() {
2205 SyntaxKind::LINE_BLOCK_MARKER | SyntaxKind::NEWLINE => {}
2206 _ => push_token_inline(&t, &mut out),
2207 },
2208 NodeOrToken::Node(n) => out.push(inline_from_node(&n)),
2209 }
2210 }
2211 coalesce_inlines(out)
2212 })
2213 .collect();
2214 Block::LineBlock(lines)
2215}
2216
2217fn latex_command_inline(node: &SyntaxNode) -> Inline {
2218 let content = node.text().to_string();
2219 Inline::RawInline("tex".to_string(), content)
2220}
2221
2222fn bracketed_span_inline(node: &SyntaxNode) -> Inline {
2223 let is_html = node
2224 .children_with_tokens()
2225 .any(|el| matches!(&el, NodeOrToken::Token(t) if t.kind() == SyntaxKind::SPAN_BRACKET_OPEN && t.text().starts_with('<')));
2226 let attr_text = node.children_with_tokens().find_map(|el| match el {
2227 NodeOrToken::Token(t) if t.kind() == SyntaxKind::SPAN_ATTRIBUTES => {
2228 Some(t.text().to_string())
2229 }
2230 NodeOrToken::Node(n) if n.kind() == SyntaxKind::SPAN_ATTRIBUTES => {
2231 Some(n.text().to_string())
2232 }
2233 _ => None,
2234 });
2235 let attr = attr_text
2236 .map(|raw| {
2237 let trimmed = raw.trim();
2238 if is_html {
2239 parse_html_attrs(trimmed)
2240 } else if let Some(inner) = trimmed.strip_prefix('{').and_then(|s| s.strip_suffix('}'))
2241 {
2242 parse_attr_block(inner)
2243 } else {
2244 Attr::default()
2245 }
2246 })
2247 .unwrap_or_default();
2248 let content = node
2249 .children()
2250 .find(|c| c.kind() == SyntaxKind::SPAN_CONTENT)
2251 .map(|n| coalesce_inlines(inlines_from(&n)))
2252 .unwrap_or_default();
2253 Inline::Span(attr, content)
2254}
2255
2256fn inline_html_span_inline(node: &SyntaxNode) -> Inline {
2257 let attr_text = node
2258 .children()
2259 .find(|c| c.kind() == SyntaxKind::HTML_ATTRS)
2260 .map(|n| n.text().to_string());
2261 let attr = attr_text
2262 .map(|raw| parse_html_attrs(raw.trim()))
2263 .unwrap_or_default();
2264 let content = node
2265 .children()
2266 .find(|c| c.kind() == SyntaxKind::SPAN_CONTENT)
2267 .map(|n| coalesce_inlines(inlines_from(&n)))
2268 .unwrap_or_default();
2269 Inline::Span(attr, content)
2270}
2271
2272fn pipe_table(node: &SyntaxNode) -> Option<TableData> {
2273 let mut header_cells: Vec<Vec<Inline>> = Vec::new();
2274 let mut body_rows: Vec<Vec<Vec<Inline>>> = Vec::new();
2275 let mut aligns: Vec<&'static str> = Vec::new();
2276 let mut caption_inlines: Vec<Inline> = Vec::new();
2277 let mut caption_attr_from_node: Option<Attr> = None;
2278 for child in node.children() {
2279 match child.kind() {
2280 SyntaxKind::TABLE_HEADER => {
2281 header_cells = pipe_table_cells(&child);
2282 }
2283 SyntaxKind::TABLE_SEPARATOR => {
2284 let raw = child.text().to_string();
2285 aligns = pipe_separator_aligns(&raw);
2286 }
2287 SyntaxKind::TABLE_ROW => {
2288 body_rows.push(pipe_table_cells(&child));
2289 }
2290 SyntaxKind::TABLE_CAPTION => {
2291 let (inlines, attr) = pipe_table_caption(&child);
2292 caption_inlines = inlines;
2293 caption_attr_from_node = attr;
2294 }
2295 _ => {}
2296 }
2297 }
2298 let cols = header_cells
2299 .len()
2300 .max(body_rows.iter().map(Vec::len).max().unwrap_or(0))
2301 .max(aligns.len());
2302 if cols == 0 {
2303 return None;
2304 }
2305 while aligns.len() < cols {
2306 aligns.push("AlignDefault");
2307 }
2308 let head_rows = if header_cells.is_empty() {
2309 Vec::new()
2310 } else {
2311 vec![cells_to_plain_blocks(header_cells, cols)]
2312 };
2313 let body_rows: Vec<Vec<GridCell>> = body_rows
2314 .into_iter()
2315 .map(|cells| cells_to_plain_blocks(cells, cols))
2316 .collect();
2317 let (attr, caption_inlines) = resolve_caption_attr(caption_inlines, caption_attr_from_node);
2318 Some(TableData {
2319 attr,
2320 caption: caption_inlines,
2321 aligns,
2322 widths: vec![None; cols],
2323 head_rows,
2324 body_rows,
2325 foot_rows: Vec::new(),
2326 })
2327}
2328
2329fn pipe_table_cells(row: &SyntaxNode) -> Vec<Vec<Inline>> {
2330 row.children()
2331 .filter(|c| c.kind() == SyntaxKind::TABLE_CELL)
2332 .map(|cell| coalesce_inlines(inlines_from(&cell)))
2333 .collect()
2334}
2335
2336fn extract_caption_attrs(mut inlines: Vec<Inline>) -> (Attr, Vec<Inline>) {
2344 let last_str_end = inlines
2345 .iter()
2346 .rposition(|i| matches!(i, Inline::Str(s) if s.ends_with('}')));
2347 let Some(end_idx) = last_str_end else {
2348 return (Attr::default(), inlines);
2349 };
2350 let mut start_idx = end_idx;
2354 let mut found_open = false;
2355 loop {
2356 match &inlines[start_idx] {
2357 Inline::Str(s) => {
2358 if s.starts_with('{') {
2359 found_open = true;
2360 break;
2361 }
2362 }
2363 Inline::Space => {}
2364 _ => return (Attr::default(), inlines),
2365 }
2366 if start_idx == 0 {
2367 break;
2368 }
2369 start_idx -= 1;
2370 }
2371 if !found_open {
2372 return (Attr::default(), inlines);
2373 }
2374 let mut raw = String::new();
2377 for el in &inlines[start_idx..=end_idx] {
2378 match el {
2379 Inline::Str(s) => raw.push_str(s),
2380 Inline::Space => raw.push(' '),
2381 _ => return (Attr::default(), inlines),
2382 }
2383 }
2384 if !(raw.starts_with('{') && raw.ends_with('}')) {
2385 return (Attr::default(), inlines);
2386 }
2387 let inner = &raw[1..raw.len() - 1];
2388 let attr = parse_attr_block(inner);
2389 inlines.truncate(start_idx);
2390 if matches!(inlines.last(), Some(Inline::Space)) {
2391 inlines.pop();
2392 }
2393 (attr, inlines)
2394}
2395
2396fn resolve_caption_attr(
2401 caption_inlines: Vec<Inline>,
2402 caption_attr_from_node: Option<Attr>,
2403) -> (Attr, Vec<Inline>) {
2404 match caption_attr_from_node {
2405 Some(attr) => (attr, caption_inlines),
2406 None => extract_caption_attrs(caption_inlines),
2407 }
2408}
2409
2410fn project_table_caption_from(node: &SyntaxNode) -> (Vec<Inline>, Option<Attr>) {
2414 node.children()
2415 .find(|c| c.kind() == SyntaxKind::TABLE_CAPTION)
2416 .map(|n| pipe_table_caption(&n))
2417 .unwrap_or_else(|| (Vec::new(), None))
2418}
2419
2420fn pipe_table_caption(node: &SyntaxNode) -> (Vec<Inline>, Option<Attr>) {
2421 let mut out = Vec::new();
2426 let mut caption_attr: Option<Attr> = None;
2427 let mut after_prefix = false;
2428 for el in node.children_with_tokens() {
2429 match el {
2430 NodeOrToken::Node(n) => {
2431 if n.kind() == SyntaxKind::TABLE_CAPTION_PREFIX {
2432 after_prefix = true;
2433 continue;
2434 }
2435 if !after_prefix {
2436 continue;
2437 }
2438 if n.kind() == SyntaxKind::ATTRIBUTE {
2439 let raw = n.text().to_string();
2440 let inner = raw.trim().trim_start_matches('{').trim_end_matches('}');
2441 caption_attr = Some(parse_attr_block(inner));
2442 if matches!(out.last(), Some(Inline::Space)) {
2444 out.pop();
2445 }
2446 continue;
2447 }
2448 out.push(inline_from_node(&n));
2449 }
2450 NodeOrToken::Token(t) => {
2451 if t.kind() == SyntaxKind::TABLE_CAPTION_PREFIX {
2452 after_prefix = true;
2453 continue;
2454 }
2455 if !after_prefix {
2456 continue;
2457 }
2458 if t.kind() == SyntaxKind::ATTRIBUTE {
2459 let raw = t.text();
2460 let inner = raw.trim().trim_start_matches('{').trim_end_matches('}');
2461 caption_attr = Some(parse_attr_block(inner));
2462 if matches!(out.last(), Some(Inline::Space)) {
2463 out.pop();
2464 }
2465 continue;
2466 }
2467 push_token_inline(&t, &mut out);
2468 }
2469 }
2470 }
2471 (coalesce_inlines(out), caption_attr)
2472}
2473
2474fn pipe_separator_aligns(raw: &str) -> Vec<&'static str> {
2475 let trimmed = raw.trim();
2480 let inner = trimmed.trim_start_matches('|').trim_end_matches('|');
2481 inner
2482 .split('|')
2483 .map(|seg| {
2484 let s = seg.trim();
2485 let left = s.starts_with(':');
2486 let right = s.ends_with(':');
2487 match (left, right) {
2488 (true, true) => "AlignCenter",
2489 (true, false) => "AlignLeft",
2490 (false, true) => "AlignRight",
2491 _ => "AlignDefault",
2492 }
2493 })
2494 .collect()
2495}
2496
2497fn cells_to_plain_blocks(cells: Vec<Vec<Inline>>, cols: usize) -> Vec<GridCell> {
2498 let mut out: Vec<GridCell> = cells
2499 .into_iter()
2500 .map(|inlines| {
2501 let blocks = if inlines.is_empty() {
2502 Vec::new()
2503 } else {
2504 vec![Block::Plain(inlines)]
2505 };
2506 GridCell::no_span(blocks)
2507 })
2508 .collect();
2509 while out.len() < cols {
2510 out.push(GridCell::no_span(Vec::new()));
2511 }
2512 out
2513}
2514
2515fn show_double(x: f64) -> String {
2519 if x == 0.0 {
2520 return "0.0".to_string();
2521 }
2522 let abs = x.abs();
2523 if (0.1..1e7).contains(&abs) {
2524 let s = format!("{x}");
2525 if s.contains('.') || s.contains('e') {
2526 s
2527 } else {
2528 format!("{s}.0")
2529 }
2530 } else {
2531 let s = format!("{x:e}");
2534 if let Some((m, e)) = s.split_once('e') {
2535 if m.contains('.') {
2536 s
2537 } else {
2538 format!("{m}.0e{e}")
2539 }
2540 } else {
2541 s
2542 }
2543 }
2544}
2545
2546fn simple_table(node: &SyntaxNode) -> Option<TableData> {
2563 let separator = node
2564 .children()
2565 .find(|c| c.kind() == SyntaxKind::TABLE_SEPARATOR)?;
2566 let cols = simple_table_dash_runs(&separator);
2567 if cols.is_empty() {
2568 return None;
2569 }
2570 let header = node
2571 .children()
2572 .find(|c| c.kind() == SyntaxKind::TABLE_HEADER);
2573 let mut body_rows_nodes: Vec<SyntaxNode> = node
2577 .children()
2578 .filter(|c| c.kind() == SyntaxKind::TABLE_ROW)
2579 .collect();
2580 if header.is_none()
2581 && body_rows_nodes
2582 .last()
2583 .map(simple_table_row_is_all_dashes)
2584 .unwrap_or(false)
2585 {
2586 body_rows_nodes.pop();
2587 }
2588 let aligns = if let Some(h) = &header {
2590 simple_table_aligns(h, &cols)
2591 } else if let Some(r0) = body_rows_nodes.first() {
2592 simple_table_aligns(r0, &cols)
2593 } else {
2594 vec!["AlignDefault"; cols.len()]
2595 };
2596 let head_rows = match &header {
2597 Some(h) => {
2598 let cells: Vec<Vec<Inline>> = simple_table_row_cells(h);
2599 vec![cells_to_plain_blocks(cells, cols.len())]
2600 }
2601 None => Vec::new(),
2602 };
2603 let body_rows: Vec<Vec<GridCell>> = body_rows_nodes
2604 .iter()
2605 .map(|r| cells_to_plain_blocks(simple_table_row_cells(r), cols.len()))
2606 .collect();
2607 let (caption_inlines, caption_attr_from_node) = project_table_caption_from(node);
2608 let (attr, caption_inlines) = resolve_caption_attr(caption_inlines, caption_attr_from_node);
2609 Some(TableData {
2610 attr,
2611 caption: caption_inlines,
2612 aligns,
2613 widths: vec![None; cols.len()],
2614 head_rows,
2615 body_rows,
2616 foot_rows: Vec::new(),
2617 })
2618}
2619
2620fn simple_table_dash_runs(separator: &SyntaxNode) -> Vec<(usize, usize)> {
2624 let raw = separator.text().to_string();
2625 let line = raw.trim_end_matches(['\n', '\r']);
2626 let mut runs = Vec::new();
2627 let mut start: Option<usize> = None;
2628 for (i, ch) in line.char_indices() {
2629 if ch == '-' {
2630 if start.is_none() {
2631 start = Some(i);
2632 }
2633 } else if let Some(s) = start.take() {
2634 runs.push((s, i - 1));
2635 }
2636 }
2637 if let Some(s) = start.take() {
2638 runs.push((s, line.len() - 1));
2639 }
2640 runs
2641}
2642
2643fn simple_table_row_cells(row: &SyntaxNode) -> Vec<Vec<Inline>> {
2644 row.children()
2649 .filter(|c| c.kind() == SyntaxKind::TABLE_CELL)
2650 .map(|cell| coalesce_inlines(inlines_from(&cell)))
2651 .collect()
2652}
2653
2654fn simple_table_row_is_all_dashes(row: &SyntaxNode) -> bool {
2655 let mut had_cell = false;
2656 for cell in row
2657 .children()
2658 .filter(|c| c.kind() == SyntaxKind::TABLE_CELL)
2659 {
2660 let text = cell.text().to_string();
2661 let trimmed = text.trim();
2662 if trimmed.is_empty() {
2663 continue;
2664 }
2665 had_cell = true;
2666 if !trimmed.chars().all(|c| c == '-') {
2667 return false;
2668 }
2669 }
2670 had_cell
2671}
2672
2673fn simple_table_aligns(row: &SyntaxNode, cols: &[(usize, usize)]) -> Vec<&'static str> {
2681 let row_start: u32 = row.text_range().start().into();
2682 let mut cell_ranges: Vec<(usize, usize)> = Vec::new();
2683 for cell in row
2684 .children()
2685 .filter(|c| c.kind() == SyntaxKind::TABLE_CELL)
2686 {
2687 if cell.text_range().is_empty() {
2688 continue;
2689 }
2690 let text = cell.text().to_string();
2691 let lstrip = text.chars().take_while(|c| *c == ' ' || *c == '\t').count();
2692 let rstrip = text
2693 .chars()
2694 .rev()
2695 .take_while(|c| *c == ' ' || *c == '\t')
2696 .count();
2697 let trimmed_len = text.chars().count().saturating_sub(lstrip + rstrip);
2698 if trimmed_len == 0 {
2699 continue;
2700 }
2701 let start: u32 = cell.text_range().start().into();
2702 let s = (start - row_start) as usize;
2703 let visible_start = s + lstrip;
2704 let visible_end = visible_start + trimmed_len - 1;
2705 cell_ranges.push((visible_start, visible_end));
2706 }
2707 cols.iter()
2708 .map(|(col_start, col_end)| {
2709 let cell = cell_ranges
2710 .iter()
2711 .find(|(cs, ce)| ce >= col_start && cs <= col_end);
2712 match cell {
2713 Some((cs, ce)) => {
2714 let left_flush = cs == col_start;
2715 let right_flush = ce == col_end;
2716 match (left_flush, right_flush) {
2717 (true, true) => "AlignDefault",
2718 (true, false) => "AlignLeft",
2719 (false, true) => "AlignRight",
2720 (false, false) => "AlignCenter",
2721 }
2722 }
2723 None => "AlignDefault",
2724 }
2725 })
2726 .collect()
2727}
2728
2729#[allow(clippy::needless_range_loop)]
2756fn grid_table(node: &SyntaxNode) -> Option<TableData> {
2757 let mut tagged: Vec<(SyntaxKind, String)> = Vec::new();
2759 for child in node.children() {
2760 if child.kind() == SyntaxKind::TABLE_CAPTION {
2761 continue;
2762 }
2763 let text = child.text().to_string();
2764 for line in text.split_inclusive('\n') {
2765 let trimmed = line.trim_end_matches('\n');
2766 tagged.push((child.kind(), trimmed.to_string()));
2767 }
2768 }
2769 if tagged.is_empty() {
2770 return None;
2771 }
2772
2773 let max_width = tagged
2775 .iter()
2776 .map(|(_, l)| l.chars().count())
2777 .max()
2778 .unwrap_or(0);
2779 let grid: Vec<Vec<char>> = tagged
2780 .iter()
2781 .map(|(_, l)| {
2782 let mut chars: Vec<char> = l.chars().collect();
2783 chars.resize(max_width, ' ');
2784 chars
2785 })
2786 .collect();
2787 let nlines = grid.len();
2788
2789 let is_sep_line: Vec<bool> = grid
2793 .iter()
2794 .map(|row| {
2795 row.contains(&'+')
2796 && row
2797 .iter()
2798 .all(|&c| matches!(c, '+' | '-' | '=' | ':' | '|' | ' '))
2799 })
2800 .collect();
2801
2802 let mut col_set: std::collections::BTreeSet<usize> = std::collections::BTreeSet::new();
2804 for (i, row) in grid.iter().enumerate() {
2805 if !is_sep_line[i] {
2806 continue;
2807 }
2808 for (j, &c) in row.iter().enumerate() {
2809 if c == '+' {
2810 col_set.insert(j);
2811 }
2812 }
2813 }
2814 let cols_pos: Vec<usize> = col_set.into_iter().collect();
2815 if cols_pos.len() < 2 {
2816 return None;
2817 }
2818 let ncols = cols_pos.len() - 1;
2819
2820 let row_seps: Vec<usize> = (0..nlines).filter(|&i| is_sep_line[i]).collect();
2822 if row_seps.len() < 2 {
2823 return None;
2824 }
2825 let nrows = row_seps.len() - 1;
2826
2827 let mut block_kind: Vec<&'static str> = vec!["body"; nrows];
2830 for r in 0..nrows {
2831 let start = row_seps[r];
2832 let end = row_seps[r + 1];
2833 for i in (start + 1)..end {
2834 match tagged[i].0 {
2835 SyntaxKind::TABLE_HEADER => block_kind[r] = "head",
2836 SyntaxKind::TABLE_FOOTER => block_kind[r] = "foot",
2837 _ => {}
2838 }
2839 }
2840 }
2841
2842 let mut occupied = vec![vec![false; ncols]; nrows];
2844 let mut cells: Vec<(usize, usize, u32, u32, String)> = Vec::new();
2846 for sr in 0..nrows {
2847 for sc in 0..ncols {
2848 if occupied[sr][sc] {
2849 continue;
2850 }
2851 let i = row_seps[sr];
2852 let j = cols_pos[sc];
2853 if grid[i][j] != '+' {
2854 continue;
2859 }
2860 let Some((er, ec, content)) = find_grid_cell(&grid, i, j, sr, sc, &cols_pos, &row_seps)
2861 else {
2862 continue;
2863 };
2864 let row_span = (er - sr) as u32;
2865 let col_span = (ec - sc) as u32;
2866 for r in sr..er {
2867 for c in sc..ec {
2868 occupied[r][c] = true;
2869 }
2870 }
2871 cells.push((sr, sc, row_span, col_span, content));
2872 }
2873 }
2874
2875 let mut head_rows: Vec<Vec<GridCell>> = Vec::new();
2878 let mut body_rows: Vec<Vec<GridCell>> = Vec::new();
2879 let mut foot_rows: Vec<Vec<GridCell>> = Vec::new();
2880 for r in 0..nrows {
2881 let mut row_cells: Vec<&(usize, usize, u32, u32, String)> =
2882 cells.iter().filter(|(sr, _, _, _, _)| *sr == r).collect();
2883 row_cells.sort_by_key(|(_, sc, _, _, _)| *sc);
2884 let row: Vec<GridCell> = row_cells
2885 .into_iter()
2886 .map(|(_, _, rs, cs, text)| {
2887 let blocks = parse_grid_cell_text(text);
2888 GridCell {
2889 row_span: *rs,
2890 col_span: *cs,
2891 blocks,
2892 }
2893 })
2894 .collect();
2895 match block_kind[r] {
2896 "head" => head_rows.push(row),
2897 "foot" => foot_rows.push(row),
2898 _ => body_rows.push(row),
2899 }
2900 }
2901
2902 let alignment_sep = node
2905 .children()
2906 .filter(|c| c.kind() == SyntaxKind::TABLE_SEPARATOR)
2907 .find(|c| c.text().to_string().contains(':'))
2908 .or_else(|| {
2909 node.children()
2910 .find(|c| c.kind() == SyntaxKind::TABLE_SEPARATOR)
2911 })?;
2912 let widths = grid_dash_widths(&alignment_sep);
2913 let aligns_raw = alignment_sep.text().to_string();
2914 let aligns = if aligns_raw.contains(':') {
2915 grid_separator_aligns(&aligns_raw, ncols)
2916 } else {
2917 vec!["AlignDefault"; ncols]
2918 };
2919
2920 let (caption_inlines, caption_attr_from_node) = project_table_caption_from(node);
2922 let (attr, caption_inlines) = resolve_caption_attr(caption_inlines, caption_attr_from_node);
2923
2924 Some(TableData {
2925 attr,
2926 caption: caption_inlines,
2927 aligns,
2928 widths: widths.into_iter().map(Some).collect(),
2929 head_rows,
2930 body_rows,
2931 foot_rows,
2932 })
2933}
2934
2935#[allow(clippy::needless_range_loop)]
2945fn find_grid_cell(
2946 grid: &[Vec<char>],
2947 i: usize,
2948 j: usize,
2949 sr: usize,
2950 sc: usize,
2951 cols_pos: &[usize],
2952 row_seps: &[usize],
2953) -> Option<(usize, usize, String)> {
2954 let nrows = row_seps.len() - 1;
2955 let ncols = cols_pos.len() - 1;
2956
2957 for ec in (sc + 1)..=ncols {
2958 let k = cols_pos[ec];
2959 let top_ok = (j + 1..k).all(|c| matches!(grid[i][c], '-' | '=' | ':' | '+'));
2961 if !top_ok {
2962 break;
2964 }
2965 for er in (sr + 1)..=nrows {
2966 let l = row_seps[er];
2967 let left_ok = (i + 1..l).all(|r| matches!(grid[r][j], '|' | '+'));
2969 if !left_ok {
2970 break;
2971 }
2972 let right_ok = (i + 1..l).all(|r| matches!(grid[r][k], '|' | '+'));
2974 if !right_ok {
2975 continue;
2976 }
2977 let bot_ok = (j + 1..k).all(|c| matches!(grid[l][c], '-' | '=' | ':' | '+'));
2979 if !bot_ok {
2980 continue;
2981 }
2982 if grid[l][j] != '+' || grid[l][k] != '+' {
2983 continue;
2984 }
2985 let interior_split = (i + 1..l).any(|m| {
2991 grid[m][j] == '+'
2992 && grid[m][k] == '+'
2993 && (j + 1..k).all(|c| matches!(grid[m][c], '-' | '=' | ':' | '+'))
2994 });
2995 if interior_split {
2996 continue;
2997 }
2998
2999 let mut content_lines: Vec<String> = Vec::new();
3003 for r in (i + 1)..l {
3004 let slice: String = grid[r][j + 1..k].iter().collect();
3005 let stripped = slice.strip_prefix(' ').unwrap_or(&slice).to_string();
3006 content_lines.push(stripped.trim_end().to_string());
3007 }
3008 let first = content_lines.iter().position(|s| !s.is_empty());
3010 let last = content_lines.iter().rposition(|s| !s.is_empty());
3011 let content = match (first, last) {
3012 (Some(f), Some(l)) => content_lines[f..=l].join("\n"),
3013 _ => String::new(),
3014 };
3015 return Some((er, ec, content));
3016 }
3017 }
3018 None
3019}
3020
3021fn parse_grid_cell_text(text: &str) -> Vec<Block> {
3025 if text.trim().is_empty() {
3026 return Vec::new();
3027 }
3028 let opts = crate::ParserOptions {
3029 flavor: crate::Flavor::Pandoc,
3030 dialect: crate::Dialect::for_flavor(crate::Flavor::Pandoc),
3031 extensions: crate::Extensions::for_flavor(crate::Flavor::Pandoc),
3032 ..crate::ParserOptions::default()
3033 };
3034 let doc = crate::parse(text, Some(opts));
3035 let mut out = Vec::new();
3036 for child in doc.children() {
3037 if let Some(block) = block_from(&child) {
3038 let block = match block {
3039 Block::Para(inlines) => Block::Plain(inlines),
3040 other => other,
3041 };
3042 out.push(block);
3043 }
3044 }
3045 out
3046}
3047
3048fn grid_dash_widths(separator: &SyntaxNode) -> Vec<f64> {
3059 let raw_text = separator.text().to_string();
3060 let line = raw_text.trim_end_matches(['\n', '\r']);
3061 let mut raw: Vec<usize> = Vec::new();
3062 let mut count: usize = 0;
3063 let mut in_col = false;
3064 for ch in line.chars() {
3065 match ch {
3066 '+' => {
3067 if in_col {
3068 raw.push(count + 1);
3069 count = 0;
3070 }
3071 in_col = true;
3072 }
3073 _ => {
3074 if in_col {
3075 count += 1;
3076 }
3077 }
3078 }
3079 }
3080 if raw.is_empty() {
3081 return Vec::new();
3082 }
3083 let total: usize = raw.iter().sum();
3084 let count = raw.len();
3085 let norm = (total + count).saturating_sub(2).max(72) as f64;
3086 raw.into_iter().map(|w| w as f64 / norm).collect()
3087}
3088
3089fn grid_separator_aligns(raw: &str, cols: usize) -> Vec<&'static str> {
3090 let line = raw.trim_end_matches(['\n', '\r']);
3091 let mut aligns: Vec<&'static str> = Vec::with_capacity(cols);
3092 let mut col_start: Option<usize> = None;
3093 for (i, ch) in line.char_indices() {
3094 if ch == '+' {
3095 if let Some(s) = col_start.take() {
3096 let seg = &line[s..i];
3097 aligns.push(grid_segment_align(seg));
3098 }
3099 col_start = Some(i + 1);
3100 }
3101 }
3102 while aligns.len() < cols {
3103 aligns.push("AlignDefault");
3104 }
3105 aligns.truncate(cols);
3106 aligns
3107}
3108
3109fn grid_segment_align(seg: &str) -> &'static str {
3110 let bytes = seg.as_bytes();
3111 let left = bytes.first() == Some(&b':');
3112 let right = bytes.last() == Some(&b':');
3113 match (left, right) {
3114 (true, true) => "AlignCenter",
3115 (true, false) => "AlignLeft",
3116 (false, true) => "AlignRight",
3117 _ => "AlignDefault",
3118 }
3119}
3120
3121fn multiline_table(node: &SyntaxNode) -> Option<TableData> {
3131 let separators: Vec<SyntaxNode> = node
3134 .children()
3135 .filter(|c| c.kind() == SyntaxKind::TABLE_SEPARATOR)
3136 .collect();
3137 let header = node
3138 .children()
3139 .find(|c| c.kind() == SyntaxKind::TABLE_HEADER);
3140 let column_sep = if header.is_some() {
3141 separators.get(1).cloned()
3142 } else {
3143 separators.first().cloned()
3144 }?;
3145 let cols = simple_table_dash_runs(&column_sep);
3146 if cols.is_empty() {
3147 return None;
3148 }
3149 let raw: Vec<usize> = cols
3154 .iter()
3155 .enumerate()
3156 .map(|(i, (s, e))| {
3157 if i + 1 < cols.len() {
3158 cols[i + 1].0 - s
3159 } else {
3160 e - s + 2
3161 }
3162 })
3163 .collect();
3164 let total: usize = raw.iter().sum();
3165 let norm = (total.max(72)) as f64;
3166 let widths: Vec<f64> = raw.into_iter().map(|w| w as f64 / norm).collect();
3167 let aligns = if let Some(h) = &header {
3170 simple_table_aligns(h, &cols)
3171 } else if let Some(r0) = node.children().find(|c| c.kind() == SyntaxKind::TABLE_ROW) {
3172 simple_table_aligns(&r0, &cols)
3173 } else {
3174 vec!["AlignDefault"; cols.len()]
3175 };
3176 let head_rows = match &header {
3177 Some(h) => vec![
3178 multiline_row_cells_blocks(h, &cols)
3179 .into_iter()
3180 .map(GridCell::no_span)
3181 .collect(),
3182 ],
3183 None => Vec::new(),
3184 };
3185 let body_rows: Vec<Vec<GridCell>> = node
3186 .children()
3187 .filter(|c| c.kind() == SyntaxKind::TABLE_ROW)
3188 .map(|r| {
3189 multiline_row_cells_blocks(&r, &cols)
3190 .into_iter()
3191 .map(GridCell::no_span)
3192 .collect()
3193 })
3194 .collect();
3195 let (caption_inlines, caption_attr_from_node) = project_table_caption_from(node);
3196 let (attr, caption_inlines) = resolve_caption_attr(caption_inlines, caption_attr_from_node);
3197 Some(TableData {
3198 attr,
3199 caption: caption_inlines,
3200 aligns,
3201 widths: widths.into_iter().map(Some).collect(),
3202 head_rows,
3203 body_rows,
3204 foot_rows: Vec::new(),
3205 })
3206}
3207
3208fn multiline_row_cells_blocks(row: &SyntaxNode, cols: &[(usize, usize)]) -> Vec<Vec<Block>> {
3212 let row_start: u32 = row.text_range().start().into();
3213 let raw = row.text().to_string();
3214 let lines: Vec<&str> = raw.split_inclusive('\n').collect();
3218 let mut col_lines: Vec<Vec<String>> = vec![Vec::new(); cols.len()];
3219 let mut line_start_offset: usize = 0;
3220 for line in lines {
3221 let line_no_nl = line.trim_end_matches('\n');
3222 if line_no_nl.trim().is_empty() {
3223 line_start_offset += line.len();
3224 continue;
3225 }
3226 for (i, &(cs, ce)) in cols.iter().enumerate() {
3227 let slice = char_slice(line_no_nl, cs, ce + 1);
3229 let trimmed = slice.trim();
3230 if !trimmed.is_empty() {
3231 col_lines[i].push(trimmed.to_string());
3232 }
3233 }
3234 line_start_offset += line.len();
3235 }
3236 let _ = (row_start, line_start_offset);
3237 cols.iter()
3238 .enumerate()
3239 .map(|(i, _)| {
3240 let segments = &col_lines[i];
3241 if segments.is_empty() {
3242 return Vec::new();
3243 }
3244 let joined = segments.join("\n");
3250 let inlines = parse_cell_text_inlines(&joined);
3251 if inlines.is_empty() {
3252 return Vec::new();
3253 }
3254 vec![Block::Plain(coalesce_inlines(inlines))]
3255 })
3256 .collect()
3257}
3258
3259fn parse_cell_text_inlines(text: &str) -> Vec<Inline> {
3265 if text.trim().is_empty() {
3266 return Vec::new();
3267 }
3268 let opts = crate::ParserOptions {
3269 flavor: crate::Flavor::Pandoc,
3270 dialect: crate::Dialect::for_flavor(crate::Flavor::Pandoc),
3271 extensions: crate::Extensions::for_flavor(crate::Flavor::Pandoc),
3272 ..crate::ParserOptions::default()
3273 };
3274 let doc = crate::parse(text, Some(opts));
3275 for node in doc.descendants() {
3276 if matches!(node.kind(), SyntaxKind::PARAGRAPH | SyntaxKind::PLAIN) {
3277 return inlines_from(&node);
3278 }
3279 }
3280 Vec::new()
3281}
3282
3283fn char_slice(s: &str, start_char: usize, end_char: usize) -> &str {
3284 let mut start_byte = s.len();
3285 let mut end_byte = s.len();
3286 for (i, (b, _)) in s.char_indices().enumerate() {
3287 if i == start_char {
3288 start_byte = b;
3289 }
3290 if i == end_char {
3291 end_byte = b;
3292 break;
3293 }
3294 }
3295 if start_byte > end_byte {
3296 return "";
3297 }
3298 &s[start_byte..end_byte]
3299}
3300
3301fn list_block(node: &SyntaxNode) -> Block {
3302 let loose = is_loose_list(node);
3303 let items: Vec<Vec<Block>> = node
3304 .children()
3305 .filter(|c| c.kind() == SyntaxKind::LIST_ITEM)
3306 .map(|item| list_item_blocks(&item, loose))
3307 .collect();
3308 if list_is_ordered(node) {
3309 let (start, style, delim) = ordered_list_attrs(node);
3310 Block::OrderedList(start, style, delim, items)
3311 } else {
3312 Block::BulletList(items)
3313 }
3314}
3315
3316fn list_is_ordered(node: &SyntaxNode) -> bool {
3317 let Some(item) = node.children().find(|c| c.kind() == SyntaxKind::LIST_ITEM) else {
3318 return false;
3319 };
3320 let marker = item
3321 .children_with_tokens()
3322 .filter_map(|el| el.into_token())
3323 .find(|t| t.kind() == SyntaxKind::LIST_MARKER)
3324 .map(|t| t.text().to_string())
3325 .unwrap_or_default();
3326 let trimmed = marker.trim();
3327 !trimmed.starts_with(['-', '+', '*'])
3328}
3329
3330fn ordered_list_attrs(node: &SyntaxNode) -> (usize, &'static str, &'static str) {
3331 let item = node.children().find(|c| c.kind() == SyntaxKind::LIST_ITEM);
3332 let marker = item
3333 .as_ref()
3334 .and_then(|i| {
3335 i.children_with_tokens()
3336 .filter_map(|el| el.into_token())
3337 .find(|t| t.kind() == SyntaxKind::LIST_MARKER)
3338 .map(|t| t.text().to_string())
3339 })
3340 .unwrap_or_default();
3341 let (mut start, style, delim) = classify_ordered_marker(marker.trim());
3342 if style == "Example" {
3343 let offset: u32 = node.text_range().start().into();
3344 if let Some(s) = REFS_CTX.with(|c| {
3345 c.borrow()
3346 .example_list_start_by_offset
3347 .get(&offset)
3348 .copied()
3349 }) {
3350 start = s;
3351 }
3352 }
3353 (start, style, delim)
3354}
3355
3356fn classify_ordered_marker(trimmed: &str) -> (usize, &'static str, &'static str) {
3364 let (body, delim) =
3366 if let Some(inner) = trimmed.strip_prefix('(').and_then(|s| s.strip_suffix(')')) {
3367 (inner, "TwoParens")
3368 } else if let Some(inner) = trimmed.strip_suffix(')') {
3369 (inner, "OneParen")
3370 } else if let Some(inner) = trimmed.strip_suffix('.') {
3371 (inner, "Period")
3372 } else {
3373 (trimmed, "DefaultDelim")
3374 };
3375
3376 if !body.is_empty() && body.chars().all(|c| c.is_ascii_digit()) {
3378 let start: usize = body.parse().unwrap_or(1);
3379 return (start, "Decimal", delim);
3380 }
3381
3382 if body == "#" {
3385 return (1, "DefaultStyle", "DefaultDelim");
3386 }
3387
3388 if let Some(rest) = body.strip_prefix('@')
3390 && rest
3391 .chars()
3392 .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
3393 {
3394 return (1, "Example", delim);
3395 }
3396
3397 if body == "i" {
3399 return (1, "LowerRoman", delim);
3400 }
3401 if body == "I" {
3402 return (1, "UpperRoman", delim);
3403 }
3404
3405 if body.len() == 1
3407 && let Some(c) = body.chars().next()
3408 {
3409 if c.is_ascii_lowercase() {
3410 return ((c as u8 - b'a') as usize + 1, "LowerAlpha", delim);
3411 }
3412 if c.is_ascii_uppercase() {
3413 return ((c as u8 - b'A') as usize + 1, "UpperAlpha", delim);
3414 }
3415 }
3416
3417 if body
3419 .chars()
3420 .all(|c| matches!(c, 'i' | 'v' | 'x' | 'l' | 'c' | 'd' | 'm'))
3421 && let Some(n) = roman_to_int(body, false)
3422 {
3423 return (n, "LowerRoman", delim);
3424 }
3425 if body
3426 .chars()
3427 .all(|c| matches!(c, 'I' | 'V' | 'X' | 'L' | 'C' | 'D' | 'M'))
3428 && let Some(n) = roman_to_int(body, true)
3429 {
3430 return (n, "UpperRoman", delim);
3431 }
3432
3433 (1, "Decimal", delim)
3436}
3437
3438fn roman_to_int(s: &str, upper: bool) -> Option<usize> {
3442 let normalize = |c: char| if upper { c } else { c.to_ascii_uppercase() };
3443 let value = |c: char| match c {
3444 'I' => 1,
3445 'V' => 5,
3446 'X' => 10,
3447 'L' => 50,
3448 'C' => 100,
3449 'D' => 500,
3450 'M' => 1000,
3451 _ => 0,
3452 };
3453 let chars: Vec<char> = s.chars().map(normalize).collect();
3454 if chars.is_empty() {
3455 return None;
3456 }
3457 let mut total = 0usize;
3458 let mut i = 0;
3459 while i < chars.len() {
3460 let v = value(chars[i]);
3461 if v == 0 {
3462 return None;
3463 }
3464 let next = chars.get(i + 1).copied().map(value).unwrap_or(0);
3465 if v < next {
3466 total += next - v;
3467 i += 2;
3468 } else {
3469 total += v;
3470 i += 1;
3471 }
3472 }
3473 Some(total)
3474}
3475
3476fn list_item_blocks(item: &SyntaxNode, loose: bool) -> Vec<Block> {
3477 let mut out = Vec::new();
3478 let item_indent = list_item_content_offset(item);
3479 let task_checkbox = task_checkbox_for_item(item);
3480 let mut checkbox_emitted = false;
3481 for child in item.children() {
3482 match child.kind() {
3483 SyntaxKind::PLAIN => {
3484 let mut inlines = coalesce_inlines(inlines_from(&child));
3485 if inlines.is_empty() {
3490 continue;
3491 }
3492 if !checkbox_emitted && let Some(glyph) = task_checkbox {
3493 inlines.insert(0, Inline::Space);
3494 inlines.insert(0, Inline::Str(glyph.to_string()));
3495 checkbox_emitted = true;
3496 }
3497 if loose {
3498 out.push(Block::Para(inlines));
3499 } else {
3500 out.push(Block::Plain(inlines));
3501 }
3502 }
3503 SyntaxKind::CODE_BLOCK => {
3504 out.push(indented_code_block_with_extra_strip(&child, item_indent));
3512 }
3513 _ => collect_block(&child, &mut out),
3514 }
3515 }
3516 out
3517}
3518
3519fn task_checkbox_for_item(item: &SyntaxNode) -> Option<&'static str> {
3524 item.children_with_tokens()
3525 .filter_map(|el| el.into_token())
3526 .find(|t| t.kind() == SyntaxKind::TASK_CHECKBOX)
3527 .map(|t| {
3528 let text = t.text();
3529 if text.contains('x') || text.contains('X') {
3530 "\u{2612}"
3531 } else {
3532 "\u{2610}"
3533 }
3534 })
3535}
3536
3537fn list_item_content_offset(item: &SyntaxNode) -> usize {
3556 let parent_ws = parent_list_leading_ws(item);
3557 let mut marker_width = 0usize;
3558 let mut leading_ws = 0usize;
3559 let mut saw_marker = false;
3560 for el in item.children_with_tokens() {
3561 if let NodeOrToken::Token(t) = el {
3562 match t.kind() {
3563 SyntaxKind::WHITESPACE if !saw_marker => {
3564 leading_ws += t.text().chars().count();
3565 }
3566 SyntaxKind::LIST_MARKER => {
3567 marker_width += t.text().chars().count();
3568 saw_marker = true;
3569 }
3570 SyntaxKind::WHITESPACE if saw_marker => {
3571 return parent_ws + leading_ws + marker_width + t.text().chars().count();
3572 }
3573 _ if saw_marker => {
3574 return parent_ws + leading_ws + marker_width;
3575 }
3576 _ => {}
3577 }
3578 } else if saw_marker {
3579 return parent_ws + leading_ws + marker_width;
3580 }
3581 }
3582 parent_ws + leading_ws + marker_width
3583}
3584
3585fn parent_list_leading_ws(item: &SyntaxNode) -> usize {
3590 let prev = item.prev_sibling_or_token();
3591 match prev {
3592 Some(NodeOrToken::Token(t)) if t.kind() == SyntaxKind::WHITESPACE => {
3593 t.text().chars().count()
3594 }
3595 _ => 0,
3596 }
3597}
3598
3599fn is_loose_list(node: &SyntaxNode) -> bool {
3600 let mut prev_was_item = false;
3601 for child in node.children_with_tokens() {
3602 if let NodeOrToken::Node(n) = child {
3603 if n.kind() == SyntaxKind::LIST_ITEM {
3604 prev_was_item = true;
3605 } else if n.kind() == SyntaxKind::BLANK_LINE
3606 && prev_was_item
3607 && n.next_sibling()
3608 .map(|s| s.kind() == SyntaxKind::LIST_ITEM)
3609 .unwrap_or(false)
3610 {
3611 return true;
3612 }
3613 }
3614 }
3615 for item in node
3616 .children()
3617 .filter(|c| c.kind() == SyntaxKind::LIST_ITEM)
3618 {
3619 if item.children().any(|c| c.kind() == SyntaxKind::PARAGRAPH) {
3620 return true;
3621 }
3622 if has_internal_blank_between_blocks(&item) {
3627 return true;
3628 }
3629 }
3630 false
3631}
3632
3633fn has_internal_blank_between_blocks(item: &SyntaxNode) -> bool {
3634 let mut saw_block_before = false;
3635 let mut pending_blank = false;
3636 for child in item.children() {
3637 match child.kind() {
3638 SyntaxKind::BLANK_LINE => {
3639 if saw_block_before {
3640 pending_blank = true;
3641 }
3642 }
3643 SyntaxKind::PLAIN if child_is_empty_plain(&child) => {}
3647 _ => {
3648 if pending_blank {
3649 return true;
3650 }
3651 saw_block_before = true;
3652 }
3653 }
3654 }
3655 false
3656}
3657
3658fn child_is_empty_plain(node: &SyntaxNode) -> bool {
3659 !node.children_with_tokens().any(|el| match el {
3660 NodeOrToken::Token(t) => !matches!(t.kind(), SyntaxKind::NEWLINE | SyntaxKind::WHITESPACE),
3661 NodeOrToken::Node(_) => true,
3662 })
3663}
3664
3665fn inlines_from(parent: &SyntaxNode) -> Vec<Inline> {
3668 let mut out = Vec::new();
3669 let mut iter = parent.children_with_tokens().peekable();
3670 while let Some(el) = iter.next() {
3671 match el {
3672 NodeOrToken::Token(t) => push_token_inline(&t, &mut out),
3673 NodeOrToken::Node(n) if n.kind() == SyntaxKind::LATEX_COMMAND => {
3674 emit_latex_command_with_absorb(&n, &mut iter, &mut out);
3675 }
3676 NodeOrToken::Node(n) if n.kind() == SyntaxKind::CITATION => {
3677 emit_citation_with_absorb(&n, &mut iter, &mut out);
3678 }
3679 NodeOrToken::Node(n) => push_inline_node(&n, &mut out),
3680 }
3681 }
3682 while matches!(out.last(), Some(Inline::SoftBreak)) {
3686 out.pop();
3687 }
3688 out
3689}
3690
3691fn emit_citation_with_absorb<I>(
3699 node: &SyntaxNode,
3700 iter: &mut std::iter::Peekable<I>,
3701 out: &mut Vec<Inline>,
3702) where
3703 I: Iterator<Item = rowan::SyntaxElement<crate::syntax::PanacheLanguage>>,
3704{
3705 let bracketed = node
3706 .children_with_tokens()
3707 .filter_map(|el| el.into_token())
3708 .any(|t| t.kind() == SyntaxKind::LINK_START);
3709 if bracketed {
3710 render_citation_inline(node, out, None);
3711 return;
3712 }
3713 let next_sibling_pair = node.next_sibling_or_token().and_then(|el1| {
3717 let t = el1.as_token().cloned()?;
3718 if t.kind() != SyntaxKind::TEXT || !t.text().starts_with(' ') {
3719 return None;
3720 }
3721 let space_text = t.text().to_string();
3722 let link_el = t.next_sibling_or_token()?;
3723 let link = link_el.as_node().cloned()?;
3724 if link.kind() != SyntaxKind::LINK && link.kind() != SyntaxKind::UNRESOLVED_REFERENCE {
3729 return None;
3730 }
3731 let has_dest = link
3732 .children_with_tokens()
3733 .filter_map(|el| el.into_token())
3734 .any(|tok| tok.kind() == SyntaxKind::LINK_DEST_START);
3735 if has_dest {
3736 return None;
3737 }
3738 let link_text = link
3739 .children()
3740 .find(|c| c.kind() == SyntaxKind::LINK_TEXT)
3741 .map(|tt| tt.text().to_string())
3742 .unwrap_or_default();
3743 Some((space_text, link_text))
3744 });
3745 if let Some((_space_text, locator_text)) = next_sibling_pair {
3746 iter.next();
3748 iter.next();
3749 render_citation_inline(node, out, Some(&locator_text));
3750 } else {
3751 render_citation_inline(node, out, None);
3752 }
3753}
3754
3755fn emit_latex_command_with_absorb<I>(
3762 node: &SyntaxNode,
3763 iter: &mut std::iter::Peekable<I>,
3764 out: &mut Vec<Inline>,
3765) where
3766 I: Iterator<Item = rowan::SyntaxElement<crate::syntax::PanacheLanguage>>,
3767{
3768 let mut content = node.text().to_string();
3769 let ends_in_letter = content
3770 .chars()
3771 .next_back()
3772 .is_some_and(|c| c.is_ascii_alphabetic());
3773 if ends_in_letter
3774 && let Some(NodeOrToken::Token(t)) = iter.peek()
3775 && t.kind() == SyntaxKind::TEXT
3776 {
3777 let text = t.text().to_string();
3778 let bytes = text.as_bytes();
3779 let mut absorbed = 0;
3780 while absorbed < bytes.len() && (bytes[absorbed] == b' ' || bytes[absorbed] == b'\t') {
3781 absorbed += 1;
3782 }
3783 if absorbed > 0 {
3784 content.push_str(&text[..absorbed]);
3785 out.push(Inline::RawInline("tex".to_string(), content));
3786 iter.next();
3787 let remainder = &text[absorbed..];
3788 if !remainder.is_empty() {
3789 push_text(remainder, out);
3790 }
3791 return;
3792 }
3793 }
3794 out.push(Inline::RawInline("tex".to_string(), content));
3795}
3796
3797fn push_inline_node(node: &SyntaxNode, out: &mut Vec<Inline>) {
3798 match node.kind() {
3799 SyntaxKind::LINK => render_link_inline(node, out),
3800 SyntaxKind::IMAGE_LINK => render_image_inline(node, out),
3801 SyntaxKind::CITATION => render_citation_inline(node, out, None),
3802 SyntaxKind::UNRESOLVED_REFERENCE => render_unresolved_reference_inline(node, out),
3810 _ => out.push(inline_from_node(node)),
3811 }
3812}
3813
3814fn render_unresolved_reference_inline(node: &SyntaxNode, out: &mut Vec<Inline>) {
3827 let is_image = node
3828 .children()
3829 .any(|c| c.kind() == SyntaxKind::IMAGE_LINK_START);
3830 let text_node = if is_image {
3831 node.children().find(|c| c.kind() == SyntaxKind::IMAGE_ALT)
3832 } else {
3833 node.children().find(|c| c.kind() == SyntaxKind::LINK_TEXT)
3834 };
3835 let ref_node = node.children().find(|c| c.kind() == SyntaxKind::LINK_REF);
3836
3837 let text_label = text_node
3838 .as_ref()
3839 .map(|n| n.text().to_string())
3840 .unwrap_or_default();
3841 let (label, has_second_brackets, second_inner) = match ref_node.as_ref() {
3842 Some(rn) => {
3843 let inner = rn.text().to_string();
3844 if inner.is_empty() {
3845 (text_label.clone(), true, String::new())
3846 } else {
3847 (inner.clone(), true, inner)
3848 }
3849 }
3850 None => (text_label.clone(), false, String::new()),
3851 };
3852
3853 if !is_image && let Some(id) = lookup_heading_id(&label) {
3856 let url = format!("#{id}");
3857 let resolved_text_inlines = text_node
3858 .as_ref()
3859 .map(|n| coalesce_inlines(inlines_from(n)))
3860 .unwrap_or_default();
3861 out.push(Inline::Link(
3862 extract_attr_from_node(node),
3863 resolved_text_inlines,
3864 url,
3865 String::new(),
3866 ));
3867 return;
3868 }
3869
3870 if let Some((url, title)) = lookup_ref(&label) {
3876 let resolved_text_inlines = text_node
3877 .as_ref()
3878 .map(|n| coalesce_inlines(inlines_from(n)))
3879 .unwrap_or_default();
3880 let kind = if is_image {
3881 Inline::Image
3882 } else {
3883 Inline::Link
3884 };
3885 out.push(kind(
3886 extract_attr_from_node(node),
3887 resolved_text_inlines,
3888 url,
3889 title,
3890 ));
3891 return;
3892 }
3893
3894 let unresolved_text_inlines = text_node
3897 .as_ref()
3898 .map(|n| coalesce_inlines_keep_edges(inlines_from(n)))
3899 .unwrap_or_default();
3900 let opener = if is_image { "![" } else { "[" };
3901 out.push(Inline::Str(opener.to_string()));
3902 out.extend(unresolved_text_inlines);
3903 let suffix = if has_second_brackets {
3904 format!("][{second_inner}]")
3905 } else {
3906 "]".to_string()
3907 };
3908 out.push(Inline::Str(suffix));
3909}
3910
3911fn render_citation_inline(
3920 node: &SyntaxNode,
3921 out: &mut Vec<Inline>,
3922 extra_suffix_text: Option<&str>,
3923) {
3924 let first_key = node
3926 .children_with_tokens()
3927 .filter_map(|el| el.into_token())
3928 .find(|t| t.kind() == SyntaxKind::CITATION_KEY)
3929 .map(|t| t.text().to_string())
3930 .unwrap_or_default();
3931 let example_resolution =
3932 REFS_CTX.with(|c| c.borrow().example_label_to_num.get(&first_key).copied());
3933 if let Some(n) = example_resolution {
3934 out.push(Inline::Str(n.to_string()));
3935 return;
3936 }
3937
3938 let bracketed = node
3939 .children_with_tokens()
3940 .filter_map(|el| el.into_token())
3941 .any(|t| t.kind() == SyntaxKind::LINK_START);
3942
3943 let mut builders: Vec<CitationBuilder> = Vec::new();
3944 let mut current: Option<CitationBuilder> = None;
3945 let mut pending_prefix = String::new();
3946 for el in node.children_with_tokens() {
3947 let token = match el {
3948 NodeOrToken::Token(t) => t,
3949 _ => continue,
3950 };
3951 match token.kind() {
3952 SyntaxKind::LINK_START | SyntaxKind::LINK_DEST => {}
3953 SyntaxKind::CITATION_BRACE_OPEN | SyntaxKind::CITATION_BRACE_CLOSE => {}
3954 SyntaxKind::CITATION_MARKER => {
3955 if let Some(c) = current.take() {
3956 builders.push(c);
3957 }
3958 let mode = if token.text() == "-@" {
3959 CitationMode::SuppressAuthor
3960 } else if bracketed {
3961 CitationMode::NormalCitation
3962 } else {
3963 CitationMode::AuthorInText
3964 };
3965 current = Some(CitationBuilder::new(
3966 std::mem::take(&mut pending_prefix),
3967 mode,
3968 ));
3969 }
3970 SyntaxKind::CITATION_KEY => {
3971 if let Some(c) = &mut current {
3972 c.id.push_str(token.text());
3973 }
3974 }
3975 SyntaxKind::CITATION_CONTENT => {
3976 if let Some(c) = &mut current {
3977 c.suffix_raw.push_str(token.text());
3978 } else {
3979 pending_prefix.push_str(token.text());
3980 }
3981 }
3982 SyntaxKind::CITATION_SEPARATOR => {
3983 if let Some(c) = current.take() {
3984 builders.push(c);
3985 }
3986 }
3987 _ => {}
3988 }
3989 }
3990 if let Some(c) = current.take() {
3991 builders.push(c);
3992 }
3993
3994 if let Some(extra) = extra_suffix_text
3998 && let Some(last) = builders.last_mut()
3999 {
4000 if !last.suffix_raw.is_empty() && !extra.starts_with(' ') {
4001 last.suffix_raw.push(' ');
4002 }
4003 last.suffix_raw.push_str(extra);
4004 }
4005
4006 let note_offset: u32 = node.text_range().start().into();
4007 let note_num = REFS_CTX
4008 .with(|c| {
4009 c.borrow()
4010 .cite_note_num_by_offset
4011 .get(¬e_offset)
4012 .copied()
4013 })
4014 .unwrap_or(1);
4015
4016 let projected: Vec<Citation> = builders
4017 .into_iter()
4018 .map(|b| b.into_citation(note_num))
4019 .collect();
4020
4021 let mut literal = node.text().to_string();
4023 if let Some(extra) = extra_suffix_text {
4024 literal.push(' ');
4025 literal.push('[');
4026 literal.push_str(extra);
4027 literal.push(']');
4028 }
4029 let text_inlines = literal_inlines(&literal);
4030
4031 out.push(Inline::Cite(projected, text_inlines));
4032}
4033
4034struct CitationBuilder {
4040 id: String,
4041 prefix_raw: String,
4042 suffix_raw: String,
4043 mode: CitationMode,
4044}
4045
4046impl CitationBuilder {
4047 fn new(prefix_raw: String, mode: CitationMode) -> Self {
4048 Self {
4049 id: String::new(),
4050 prefix_raw,
4051 suffix_raw: String::new(),
4052 mode,
4053 }
4054 }
4055
4056 fn into_citation(self, note_num: i64) -> Citation {
4057 let prefix = parse_cite_affix_inlines(self.prefix_raw.trim_end(), true);
4058 let suffix = parse_cite_affix_inlines(&self.suffix_raw, false);
4059 Citation {
4060 id: self.id,
4061 prefix,
4062 suffix,
4063 mode: self.mode,
4064 note_num,
4065 hash: 0,
4066 }
4067 }
4068}
4069
4070fn parse_cite_affix_inlines(raw: &str, is_prefix: bool) -> Vec<Inline> {
4082 if raw.is_empty() {
4083 return Vec::new();
4084 }
4085 let trimmed = if is_prefix { raw.trim_start() } else { raw };
4086 if trimmed.is_empty() {
4087 return Vec::new();
4088 }
4089 let leading_space = !is_prefix && trimmed.starts_with([' ', '\t']);
4090 let work = trimmed.trim_start_matches([' ', '\t']);
4091 if work.is_empty() {
4092 return if leading_space {
4093 vec![Inline::Space]
4094 } else {
4095 Vec::new()
4096 };
4097 }
4098 let wrapped = format!("Z {work}");
4099 let inlines = parse_cell_text_inlines(&wrapped);
4100 let mut coalesced = coalesce_inlines(inlines);
4101 if matches!(coalesced.first(), Some(Inline::Str(s)) if s == "Z") {
4103 coalesced.remove(0);
4104 if matches!(coalesced.first(), Some(Inline::Space)) {
4105 coalesced.remove(0);
4106 }
4107 }
4108 if leading_space {
4109 coalesced.insert(0, Inline::Space);
4110 }
4111 coalesced
4112}
4113
4114fn literal_inlines(text: &str) -> Vec<Inline> {
4120 let mut out: Vec<Inline> = Vec::new();
4121 let mut buf = String::new();
4122 for ch in text.chars() {
4123 match ch {
4124 ' ' | '\t' => {
4125 if !buf.is_empty() {
4126 out.push(Inline::Str(std::mem::take(&mut buf)));
4127 }
4128 if !matches!(out.last(), Some(Inline::Space) | Some(Inline::SoftBreak)) {
4129 out.push(Inline::Space);
4130 }
4131 }
4132 '\n' => {
4133 if !buf.is_empty() {
4134 out.push(Inline::Str(std::mem::take(&mut buf)));
4135 }
4136 if matches!(out.last(), Some(Inline::Space)) {
4137 out.pop();
4138 }
4139 out.push(Inline::SoftBreak);
4140 }
4141 _ => buf.push(ch),
4142 }
4143 }
4144 if !buf.is_empty() {
4145 out.push(Inline::Str(buf));
4146 }
4147 out
4148}
4149
4150fn push_token_inline(
4151 t: &rowan::SyntaxToken<crate::syntax::PanacheLanguage>,
4152 out: &mut Vec<Inline>,
4153) {
4154 match t.kind() {
4155 SyntaxKind::TEXT => push_text(t.text(), out),
4156 SyntaxKind::WHITESPACE => out.push(Inline::Space),
4157 SyntaxKind::NEWLINE => out.push(Inline::SoftBreak),
4158 SyntaxKind::HARD_LINE_BREAK => out.push(Inline::LineBreak),
4159 SyntaxKind::ESCAPED_CHAR => {
4160 let s: String = t.text().chars().skip(1).collect();
4162 out.push(Inline::Str(s));
4163 }
4164 SyntaxKind::NONBREAKING_SPACE => out.push(Inline::Str("\u{a0}".to_string())),
4165 _ => {}
4168 }
4169}
4170
4171fn push_text(text: &str, out: &mut Vec<Inline>) {
4172 let mut buf = String::new();
4173 for ch in text.chars() {
4174 if ch == ' ' || ch == '\t' {
4175 if !buf.is_empty() {
4176 out.push(Inline::Str(std::mem::take(&mut buf)));
4177 }
4178 out.push(Inline::Space);
4179 } else if ch == '\n' {
4180 if !buf.is_empty() {
4181 out.push(Inline::Str(std::mem::take(&mut buf)));
4182 }
4183 out.push(Inline::SoftBreak);
4184 } else {
4185 buf.push(ch);
4186 }
4187 }
4188 if !buf.is_empty() {
4189 out.push(Inline::Str(buf));
4190 }
4191}
4192
4193fn inline_from_node(node: &SyntaxNode) -> Inline {
4194 match node.kind() {
4195 SyntaxKind::EMPHASIS => {
4196 Inline::Emph(coalesce_inlines_keep_edges(inlines_from_marked(node)))
4197 }
4198 SyntaxKind::STRONG => {
4199 Inline::Strong(coalesce_inlines_keep_edges(inlines_from_marked(node)))
4200 }
4201 SyntaxKind::STRIKEOUT => {
4202 Inline::Strikeout(coalesce_inlines_keep_edges(inlines_from_marked(node)))
4203 }
4204 SyntaxKind::SUPERSCRIPT => {
4205 Inline::Superscript(coalesce_inlines_keep_edges(inlines_from_marked(node)))
4206 }
4207 SyntaxKind::SUBSCRIPT => {
4208 Inline::Subscript(coalesce_inlines_keep_edges(inlines_from_marked(node)))
4209 }
4210 SyntaxKind::INLINE_CODE => {
4211 let content: String = node
4212 .children_with_tokens()
4213 .filter_map(|el| el.into_token())
4214 .filter(|t| t.kind() == SyntaxKind::INLINE_CODE_CONTENT)
4215 .map(|t| t.text().to_string())
4216 .collect();
4217 Inline::Code(
4218 extract_attr_from_node(node),
4219 strip_inline_code_padding(&content),
4220 )
4221 }
4222 SyntaxKind::LINK | SyntaxKind::IMAGE_LINK | SyntaxKind::UNRESOLVED_REFERENCE => {
4223 Inline::Unsupported(format!("{:?}", node.kind()))
4230 }
4231 SyntaxKind::AUTO_LINK => autolink_inline(node),
4232 SyntaxKind::INLINE_MATH => math_inline(node, "InlineMath"),
4233 SyntaxKind::DISPLAY_MATH => math_inline(node, "DisplayMath"),
4234 SyntaxKind::LATEX_COMMAND => latex_command_inline(node),
4235 SyntaxKind::BRACKETED_SPAN => bracketed_span_inline(node),
4236 SyntaxKind::INLINE_HTML_SPAN => inline_html_span_inline(node),
4237 SyntaxKind::INLINE_HTML => Inline::RawInline("html".to_string(), node.text().to_string()),
4238 SyntaxKind::FOOTNOTE_REFERENCE => footnote_reference_inline(node),
4239 SyntaxKind::INLINE_FOOTNOTE => inline_footnote_inline(node),
4240 other => Inline::Unsupported(format!("{other:?}")),
4241 }
4242}
4243
4244fn inlines_from_marked(parent: &SyntaxNode) -> Vec<Inline> {
4248 let mut out = Vec::new();
4249 let mut iter = parent.children_with_tokens().peekable();
4250 while let Some(el) = iter.next() {
4251 match el {
4252 NodeOrToken::Token(t) => match t.kind() {
4253 SyntaxKind::EMPHASIS_MARKER
4254 | SyntaxKind::STRONG_MARKER
4255 | SyntaxKind::STRIKEOUT_MARKER
4256 | SyntaxKind::SUPERSCRIPT_MARKER
4257 | SyntaxKind::SUBSCRIPT_MARKER
4258 | SyntaxKind::MARK_MARKER => {}
4259 _ => push_token_inline(&t, &mut out),
4260 },
4261 NodeOrToken::Node(n) => match n.kind() {
4262 SyntaxKind::EMPHASIS_MARKER
4263 | SyntaxKind::STRONG_MARKER
4264 | SyntaxKind::STRIKEOUT_MARKER
4265 | SyntaxKind::SUPERSCRIPT_MARKER
4266 | SyntaxKind::SUBSCRIPT_MARKER
4267 | SyntaxKind::MARK_MARKER => {}
4268 _ if n.kind() == SyntaxKind::LATEX_COMMAND => {
4269 emit_latex_command_with_absorb(&n, &mut iter, &mut out);
4270 }
4271 _ => push_inline_node(&n, &mut out),
4272 },
4273 }
4274 }
4275 out
4276}
4277
4278fn render_link_inline(node: &SyntaxNode, out: &mut Vec<Inline>) {
4279 let text_node = node.children().find(|c| c.kind() == SyntaxKind::LINK_TEXT);
4280 let dest_node = node.children().find(|c| c.kind() == SyntaxKind::LINK_DEST);
4281 let has_dest_paren = node
4282 .children_with_tokens()
4283 .any(|el| matches!(el, NodeOrToken::Token(t) if t.kind() == SyntaxKind::LINK_DEST_START));
4284
4285 if has_dest_paren {
4286 let text = text_node
4287 .as_ref()
4288 .map(|n| coalesce_inlines(inlines_from(n)))
4289 .unwrap_or_default();
4290 let (url, title) = dest_node
4291 .as_ref()
4292 .map(parse_link_dest)
4293 .unwrap_or((String::new(), String::new()));
4294 out.push(Inline::Link(extract_attr_from_node(node), text, url, title));
4295 return;
4296 }
4297
4298 let ref_node = node.children().find(|c| c.kind() == SyntaxKind::LINK_REF);
4301 let resolved_text_inlines = text_node
4302 .as_ref()
4303 .map(|n| coalesce_inlines(inlines_from(n)))
4304 .unwrap_or_default();
4305 let text_label = text_node
4306 .as_ref()
4307 .map(|n| n.text().to_string())
4308 .unwrap_or_default();
4309
4310 let (label, has_second_brackets, second_inner) = match ref_node.as_ref() {
4311 Some(rn) => {
4312 let inner = rn.text().to_string();
4313 if inner.is_empty() {
4314 (text_label.clone(), true, String::new())
4315 } else {
4316 (inner.clone(), true, inner)
4317 }
4318 }
4319 None => (text_label.clone(), false, String::new()),
4320 };
4321
4322 if let Some((url, title)) = lookup_ref(&label) {
4323 out.push(Inline::Link(
4324 extract_attr_from_node(node),
4325 resolved_text_inlines,
4326 url,
4327 title,
4328 ));
4329 return;
4330 }
4331
4332 if let Some(id) = lookup_heading_id(&label) {
4333 let url = format!("#{id}");
4334 out.push(Inline::Link(
4335 extract_attr_from_node(node),
4336 resolved_text_inlines,
4337 url,
4338 String::new(),
4339 ));
4340 return;
4341 }
4342
4343 let unresolved_text_inlines = text_node
4352 .as_ref()
4353 .map(|n| coalesce_inlines_keep_edges(inlines_from(n)))
4354 .unwrap_or_default();
4355 out.push(Inline::Str("[".to_string()));
4356 out.extend(unresolved_text_inlines);
4357 let suffix = if has_second_brackets {
4358 format!("][{second_inner}]")
4359 } else {
4360 "]".to_string()
4361 };
4362 out.push(Inline::Str(suffix));
4363}
4364
4365fn render_image_inline(node: &SyntaxNode, out: &mut Vec<Inline>) {
4366 let alt_node = node.children().find(|c| c.kind() == SyntaxKind::IMAGE_ALT);
4367 let dest_node = node.children().find(|c| c.kind() == SyntaxKind::LINK_DEST);
4368 let has_dest_paren = node.children_with_tokens().any(|el| {
4369 matches!(el, NodeOrToken::Token(t) if t.kind() == SyntaxKind::IMAGE_DEST_START
4370 || t.kind() == SyntaxKind::LINK_DEST_START)
4371 });
4372
4373 if has_dest_paren {
4374 let alt = alt_node
4375 .as_ref()
4376 .map(|n| coalesce_inlines(inlines_from(n)))
4377 .unwrap_or_default();
4378 let (url, title) = dest_node
4379 .as_ref()
4380 .map(parse_link_dest)
4381 .unwrap_or((String::new(), String::new()));
4382 out.push(Inline::Image(extract_attr_from_node(node), alt, url, title));
4383 return;
4384 }
4385
4386 let ref_node = node.children().find(|c| c.kind() == SyntaxKind::LINK_REF);
4387 let alt_inlines = alt_node
4388 .as_ref()
4389 .map(|n| coalesce_inlines(inlines_from(n)))
4390 .unwrap_or_default();
4391 let alt_label = alt_node
4392 .as_ref()
4393 .map(|n| n.text().to_string())
4394 .unwrap_or_default();
4395
4396 let (label, has_second_brackets, second_inner) = match ref_node.as_ref() {
4397 Some(rn) => {
4398 let inner = rn.text().to_string();
4399 if inner.is_empty() {
4400 (alt_label.clone(), true, String::new())
4401 } else {
4402 (inner.clone(), true, inner)
4403 }
4404 }
4405 None => (alt_label.clone(), false, String::new()),
4406 };
4407
4408 if let Some((url, title)) = lookup_ref(&label) {
4409 out.push(Inline::Image(
4410 extract_attr_from_node(node),
4411 alt_inlines,
4412 url,
4413 title,
4414 ));
4415 return;
4416 }
4417
4418 if let Some(id) = lookup_heading_id(&label) {
4419 let url = format!("#{id}");
4420 out.push(Inline::Image(
4421 extract_attr_from_node(node),
4422 alt_inlines,
4423 url,
4424 String::new(),
4425 ));
4426 return;
4427 }
4428
4429 out.push(Inline::Str("![".to_string()));
4430 out.extend(alt_inlines);
4431 let suffix = if has_second_brackets {
4432 format!("][{second_inner}]")
4433 } else {
4434 "]".to_string()
4435 };
4436 out.push(Inline::Str(suffix));
4437}
4438
4439fn strip_inline_code_padding(s: &str) -> String {
4444 let collapsed: String = s.chars().map(|c| if c == '\n' { ' ' } else { c }).collect();
4445 collapsed.trim().to_string()
4446}
4447
4448fn math_inline(node: &SyntaxNode, kind: &'static str) -> Inline {
4449 let mut content = String::new();
4450 for el in node.children_with_tokens() {
4451 if let NodeOrToken::Token(t) = el {
4452 match t.kind() {
4453 SyntaxKind::INLINE_MATH_MARKER | SyntaxKind::DISPLAY_MATH_MARKER => {}
4454 _ => content.push_str(t.text()),
4455 }
4456 }
4457 }
4458 Inline::Math(kind, content)
4459}
4460
4461fn autolink_inline(node: &SyntaxNode) -> Inline {
4462 let mut url = String::new();
4463 for el in node.children_with_tokens() {
4464 if let NodeOrToken::Token(t) = el
4465 && t.kind() == SyntaxKind::TEXT
4466 {
4467 url.push_str(t.text());
4468 }
4469 }
4470 let is_email = !url.contains("://") && !url.starts_with("mailto:") && url.contains('@');
4473 if is_email {
4474 let attr = Attr {
4475 id: String::new(),
4476 classes: vec!["email".to_string()],
4477 kvs: Vec::new(),
4478 };
4479 let dest = format!("mailto:{url}");
4480 return Inline::Link(attr, vec![Inline::Str(url)], dest, String::new());
4481 }
4482 if !is_known_uri_scheme(&url) {
4486 return Inline::RawInline("html".to_string(), node.text().to_string());
4487 }
4488 let attr = Attr {
4489 id: String::new(),
4490 classes: vec!["uri".to_string()],
4491 kvs: Vec::new(),
4492 };
4493 Inline::Link(attr, vec![Inline::Str(url.clone())], url, String::new())
4494}
4495
4496fn is_known_uri_scheme(url: &str) -> bool {
4499 let scheme_end = url.find(':');
4500 let Some(end) = scheme_end else {
4501 return false;
4502 };
4503 let scheme = url[..end].to_ascii_lowercase();
4504 PANDOC_KNOWN_SCHEMES.binary_search(&scheme.as_str()).is_ok()
4505}
4506
4507#[rustfmt::skip]
4510const PANDOC_KNOWN_SCHEMES: &[&str] = &[
4511 "aaa", "aaas", "about", "acap", "acct", "acr",
4512 "adiumxtra", "afp", "afs", "aim", "appdata", "apt",
4513 "attachment", "aw", "barion", "beshare", "bitcoin", "blob",
4514 "bolo", "browserext", "callto", "cap", "chrome", "chrome-extension",
4515 "cid", "coap", "coaps", "com-eventbrite-attendee", "content", "crid",
4516 "cvs", "data", "dav", "dict", "dis", "dlna-playcontainer",
4517 "dlna-playsingle", "dns", "dntp", "doi", "dtn", "dvb",
4518 "ed2k", "example", "facetime", "fax", "feed", "feedready",
4519 "file", "filesystem", "finger", "fish", "ftp", "gemini",
4520 "geo", "gg", "git", "gizmoproject", "go", "gopher",
4521 "graph", "gtalk", "h323", "ham", "hcp", "http",
4522 "https", "hxxp", "hxxps", "hydrazone", "iax", "icap",
4523 "icon", "im", "imap", "info", "iotdisco", "ipn",
4524 "ipp", "ipps", "irc", "irc6", "ircs", "iris",
4525 "iris.beep", "iris.lwz", "iris.xpc", "iris.xpcs", "isbn", "isostore",
4526 "itms", "jabber", "jar", "javascript", "jms", "keyparc",
4527 "lastfm", "ldap", "ldaps", "lvlt", "magnet", "mailserver",
4528 "mailto", "maps", "market", "message", "mid", "mms",
4529 "modem", "mongodb", "moz", "ms-access", "ms-browser-extension", "ms-drive-to",
4530 "ms-enrollment", "ms-excel", "ms-gamebarservices", "ms-getoffice", "ms-help", "ms-infopath",
4531 "ms-media-stream-id", "ms-officeapp", "ms-powerpoint", "ms-project", "ms-publisher", "ms-search-repair",
4532 "ms-secondary-screen-controller", "ms-secondary-screen-setup", "ms-settings", "ms-settings-airplanemode", "ms-settings-bluetooth", "ms-settings-camera",
4533 "ms-settings-cellular", "ms-settings-cloudstorage", "ms-settings-connectabledevices", "ms-settings-displays-topology", "ms-settings-emailandaccounts", "ms-settings-language",
4534 "ms-settings-location", "ms-settings-lock", "ms-settings-nfctransactions", "ms-settings-notifications", "ms-settings-power", "ms-settings-privacy",
4535 "ms-settings-proximity", "ms-settings-screenrotation", "ms-settings-wifi", "ms-settings-workplace", "ms-spd", "ms-sttoverlay",
4536 "ms-transit-to", "ms-virtualtouchpad", "ms-visio", "ms-walk-to", "ms-whiteboard", "ms-whiteboard-cmd",
4537 "ms-word", "msnim", "msrp", "msrps", "mtqp", "mumble",
4538 "mupdate", "mvn", "news", "nfs", "ni", "nih",
4539 "nntp", "notes", "ocf", "oid", "onenote", "onenote-cmd",
4540 "opaquelocktoken", "pack", "palm", "paparazzi", "pkcs11", "platform",
4541 "pmid", "pop", "pres", "prospero", "proxy", "psyc",
4542 "pwid", "qb", "query", "redis", "rediss", "reload",
4543 "res", "resource", "rmi", "rsync", "rtmfp", "rtmp",
4544 "rtsp", "rtsps", "rtspu", "secondlife", "service", "session",
4545 "sftp", "sgn", "shttp", "sieve", "sip", "sips",
4546 "skype", "smb", "sms", "smtp", "snews", "snmp",
4547 "soap.beep", "soap.beeps", "soldat", "spotify", "ssh", "steam",
4548 "stun", "stuns", "submit", "svn", "tag", "teamspeak",
4549 "tel", "teliaeid", "telnet", "tftp", "things", "thismessage",
4550 "tip", "tn3270", "tool", "turn", "turns", "tv",
4551 "udp", "unreal", "urn", "ut2004", "v-event", "vemmi",
4552 "ventrilo", "videotex", "view-source", "vnc", "wais", "webcal",
4553 "wpid", "ws", "wss", "wtai", "wyciwyg", "xcon",
4554 "xcon-userid", "xfire", "xmlrpc.beep", "xmlrpc.beeps", "xmpp", "xri",
4555 "ymsgr", "z39.50", "z39.50r", "z39.50s",
4556];
4557
4558fn footnote_reference_inline(node: &SyntaxNode) -> Inline {
4559 let Some(label) = footnote_label(node) else {
4560 return Inline::Unsupported("FOOTNOTE_REFERENCE".to_string());
4561 };
4562 let blocks = REFS_CTX.with(|c| {
4563 c.borrow()
4564 .footnotes
4565 .get(&label)
4566 .map(|bs| bs.iter().map(clone_block).collect::<Vec<_>>())
4567 });
4568 match blocks {
4569 Some(bs) => Inline::Note(bs),
4570 None => Inline::Str(node.text().to_string()),
4573 }
4574}
4575
4576fn inline_footnote_inline(node: &SyntaxNode) -> Inline {
4577 let inlines = coalesce_inlines(inlines_from(node));
4578 if inlines.is_empty() {
4579 Inline::Note(Vec::new())
4580 } else {
4581 Inline::Note(vec![Block::Para(inlines)])
4582 }
4583}
4584
4585fn parse_link_dest(node: &SyntaxNode) -> (String, String) {
4586 let raw = node.text().to_string();
4590 let trimmed = raw.trim();
4591 if let Some(rest) = trimmed.strip_prefix('<')
4594 && let Some(end) = rest.find('>')
4595 {
4596 let url = &rest[..end];
4597 let after = rest[end + 1..].trim();
4598 let title = parse_dest_title(after);
4599 return (escape_link_dest(url), title);
4600 }
4601 let bytes = trimmed.as_bytes();
4605 let mut url_end = trimmed.len();
4606 let mut i = 0;
4607 while i < bytes.len() {
4608 if matches!(bytes[i], b' ' | b'\t' | b'\n') {
4609 let mut j = i;
4610 while j < bytes.len() && matches!(bytes[j], b' ' | b'\t' | b'\n') {
4611 j += 1;
4612 }
4613 if j < bytes.len() && matches!(bytes[j], b'"' | b'\'' | b'(') {
4614 url_end = i;
4615 break;
4616 }
4617 i = j;
4618 } else {
4619 i += 1;
4620 }
4621 }
4622 let url_raw = &trimmed[..url_end];
4623 let title = parse_dest_title(trimmed[url_end..].trim());
4624 (escape_link_dest(url_raw), title)
4625}
4626
4627fn escape_link_dest(s: &str) -> String {
4631 let mut out = String::with_capacity(s.len());
4632 for ch in s.chars() {
4633 let needs_escape = ch.is_whitespace()
4634 || matches!(
4635 ch,
4636 '<' | '>' | '|' | '"' | '{' | '}' | '[' | ']' | '^' | '`'
4637 );
4638 if needs_escape {
4639 let mut buf = [0u8; 4];
4640 for &b in ch.encode_utf8(&mut buf).as_bytes() {
4641 out.push_str(&format!("%{b:02X}"));
4642 }
4643 } else {
4644 out.push(ch);
4645 }
4646 }
4647 out
4648}
4649
4650fn parse_dest_title(s: &str) -> String {
4651 let bytes = s.as_bytes();
4652 if bytes.is_empty() {
4653 return String::new();
4654 }
4655 let (open, close) = match bytes[0] {
4656 b'"' => (b'"', b'"'),
4657 b'\'' => (b'\'', b'\''),
4658 b'(' => (b'(', b')'),
4659 _ => return String::new(),
4660 };
4661 if !s.starts_with(open as char) {
4662 return String::new();
4663 }
4664 if let Some(end) = s[1..].rfind(close as char) {
4665 return s[1..1 + end].to_string();
4666 }
4667 String::new()
4668}
4669
4670fn coalesce_inlines(input: Vec<Inline>) -> Vec<Inline> {
4673 coalesce_inlines_inner(input, true)
4674}
4675
4676fn coalesce_inlines_keep_edges(input: Vec<Inline>) -> Vec<Inline> {
4681 coalesce_inlines_inner(input, false)
4682}
4683
4684fn coalesce_inlines_inner(input: Vec<Inline>, trim_edges: bool) -> Vec<Inline> {
4685 let mut out: Vec<Inline> = Vec::with_capacity(input.len());
4686 for inline in input {
4687 if let Inline::Str(s) = inline {
4688 if let Some(Inline::Str(prev)) = out.last_mut() {
4689 prev.push_str(&s);
4690 } else {
4691 out.push(Inline::Str(s));
4692 }
4693 } else if let Inline::Space = inline {
4694 if matches!(out.last(), Some(Inline::Space) | Some(Inline::SoftBreak)) {
4697 continue;
4698 }
4699 out.push(Inline::Space);
4700 } else if let Inline::SoftBreak = inline {
4701 if matches!(out.last(), Some(Inline::Space)) {
4704 out.pop();
4705 }
4706 out.push(Inline::SoftBreak);
4707 } else {
4708 out.push(inline);
4709 }
4710 }
4711 if trim_edges {
4712 while matches!(out.first(), Some(Inline::Space) | Some(Inline::SoftBreak)) {
4715 out.remove(0);
4716 }
4717 while matches!(out.last(), Some(Inline::Space) | Some(Inline::SoftBreak)) {
4718 out.pop();
4719 }
4720 }
4721 for inline in out.iter_mut() {
4725 if let Inline::Str(s) = inline {
4726 let mut t = smart_intraword_apostrophe(s);
4727 t = smart_dashes_and_ellipsis(&t);
4728 *s = t;
4729 }
4730 }
4731 let out = smart_quote_pairs(out);
4732 apply_abbreviations(out)
4733}
4734
4735const PANDOC_ABBREVIATIONS: &[&str] = &[
4741 "Apr.", "Aug.", "Bros.", "Capt.", "Co.", "Corp.", "Dec.", "Dr.", "Feb.", "Fr.", "Gen.", "Gov.",
4742 "Hon.", "Inc.", "Jan.", "Jr.", "Jul.", "Jun.", "Ltd.", "M.A.", "M.D.", "Mar.", "Mr.", "Mrs.",
4743 "Ms.", "No.", "Nov.", "Oct.", "Ph.D.", "Pres.", "Prof.", "Rep.", "Rev.", "Sen.", "Sep.",
4744 "Sept.", "Sgt.", "Sr.", "St.", "aet.", "aetat.", "al.", "bk.", "c.", "cf.", "ch.", "chap.",
4745 "chs.", "col.", "cp.", "d.", "e.g.", "ed.", "eds.", "esp.", "f.", "fasc.", "ff.", "fig.",
4746 "fl.", "fol.", "fols.", "i.e.", "ill.", "incl.", "n.", "n.b.", "nn.", "p.", "pp.", "pt.",
4747 "q.v.", "s.v.", "s.vv.", "saec.", "sec.", "univ.", "viz.", "vol.", "vs.",
4748];
4749
4750fn matches_abbreviation_suffix(s: &str) -> bool {
4751 for &abbr in PANDOC_ABBREVIATIONS {
4752 if let Some(prefix) = s.strip_suffix(abbr) {
4753 if prefix.is_empty() {
4754 return true;
4755 }
4756 let last = prefix.chars().next_back().unwrap();
4757 if !last.is_alphanumeric() && last != '.' {
4758 return true;
4759 }
4760 }
4761 }
4762 false
4763}
4764
4765fn apply_abbreviations(inlines: Vec<Inline>) -> Vec<Inline> {
4775 let inlines: Vec<Inline> = inlines
4776 .into_iter()
4777 .map(|inline| match inline {
4778 Inline::Quoted(kind, content) => Inline::Quoted(kind, apply_abbreviations(content)),
4779 other => other,
4780 })
4781 .collect();
4782 let mut out: Vec<Inline> = Vec::with_capacity(inlines.len());
4783 let mut iter = inlines.into_iter().peekable();
4784 while let Some(inline) = iter.next() {
4785 if let Inline::Str(ref s) = inline
4786 && matches_abbreviation_suffix(s)
4787 && matches!(iter.peek(), Some(Inline::Space))
4788 {
4789 iter.next();
4791 let Inline::Str(mut new_s) = inline else {
4792 unreachable!()
4793 };
4794 new_s.push('\u{a0}');
4795 if let Some(Inline::Str(_)) = iter.peek()
4797 && let Some(Inline::Str(next_s)) = iter.next()
4798 {
4799 new_s.push_str(&next_s);
4800 }
4801 out.push(Inline::Str(new_s));
4802 } else {
4803 out.push(inline);
4804 }
4805 }
4806 out
4807}
4808
4809fn smart_quote_pairs(inlines: Vec<Inline>) -> Vec<Inline> {
4810 fn is_boundary(prev: Option<&Inline>) -> bool {
4818 match prev {
4819 None => true,
4820 Some(Inline::Space | Inline::SoftBreak | Inline::LineBreak) => true,
4821 Some(Inline::Str(s)) => s.chars().last().is_some_and(|c| !c.is_alphanumeric()),
4822 _ => false,
4823 }
4824 }
4825 let mut out: Vec<Inline> = Vec::with_capacity(inlines.len());
4826 let n = inlines.len();
4827 let mut consumed = vec![false; n];
4828 for i in 0..n {
4829 if consumed[i] {
4830 continue;
4831 }
4832 let Inline::Str(s) = &inlines[i] else {
4834 out.push(clone_inline(&inlines[i]));
4835 consumed[i] = true;
4836 continue;
4837 };
4838 let first = s.chars().next();
4839 let quote = match first {
4840 Some('"') => Some('"'),
4841 Some('\'') => Some('\''),
4842 _ => None,
4843 };
4844 let prev_is_boundary = is_boundary(out.last());
4850 let str_has_more = s.chars().count() > 1;
4851 let next_char_is_word = s.chars().nth(1).is_some_and(|c| !c.is_whitespace());
4852 let next_is_markup_atom = matches!(
4853 inlines.get(i + 1),
4854 Some(
4855 Inline::Emph(_)
4856 | Inline::Strong(_)
4857 | Inline::Strikeout(_)
4858 | Inline::Superscript(_)
4859 | Inline::Subscript(_)
4860 | Inline::Code(_, _)
4861 )
4862 );
4863 let attaches =
4864 (str_has_more && next_char_is_word) || (!str_has_more && next_is_markup_atom);
4865 if let Some(q) = quote
4866 && prev_is_boundary
4867 && attaches
4868 {
4869 if let Some(close_idx) = find_matching_close(&inlines, i, q, &consumed) {
4871 let kind = if q == '"' {
4875 "DoubleQuote"
4876 } else {
4877 "SingleQuote"
4878 };
4879 let mut content: Vec<Inline> = Vec::new();
4880 for j in i..=close_idx {
4881 if consumed[j] {
4882 continue;
4883 }
4884 let inline = &inlines[j];
4885 if j == i && j == close_idx {
4886 if let Inline::Str(s) = inline {
4888 let mut chars: Vec<char> = s.chars().collect();
4889 if chars.len() >= 2 {
4890 chars.remove(0);
4891 chars.pop();
4892 }
4893 let stripped: String = chars.into_iter().collect();
4894 if !stripped.is_empty() {
4895 content.push(Inline::Str(stripped));
4896 }
4897 }
4898 } else if j == i {
4899 if let Inline::Str(s) = inline {
4900 let stripped: String = s.chars().skip(1).collect();
4901 if !stripped.is_empty() {
4902 content.push(Inline::Str(stripped));
4903 }
4904 }
4905 } else if j == close_idx {
4906 if let Inline::Str(s) = inline {
4907 let mut stripped: String = s.chars().collect();
4908 stripped.pop();
4909 if !stripped.is_empty() {
4910 content.push(Inline::Str(stripped));
4911 }
4912 }
4913 } else {
4914 content.push(clone_inline(inline));
4915 }
4916 consumed[j] = true;
4917 }
4918 out.push(Inline::Quoted(kind, content));
4919 continue;
4920 }
4921 }
4922 out.push(clone_inline(&inlines[i]));
4923 consumed[i] = true;
4924 }
4925 out
4926}
4927
4928fn find_matching_close(
4929 inlines: &[Inline],
4930 open_idx: usize,
4931 quote: char,
4932 consumed: &[bool],
4933) -> Option<usize> {
4934 if let Inline::Str(s) = &inlines[open_idx]
4936 && s.chars().count() >= 3
4937 && s.ends_with(quote)
4938 {
4939 let next = inlines.get(open_idx + 1);
4941 let after_is_boundary = match next {
4942 None => true,
4943 Some(Inline::Space | Inline::SoftBreak | Inline::LineBreak) => true,
4944 Some(Inline::Str(s)) => s.chars().next().is_some_and(|c| !c.is_alphanumeric()),
4945 _ => false,
4946 };
4947 if after_is_boundary {
4948 return Some(open_idx);
4949 }
4950 }
4951 let n = inlines.len();
4954 let mut j = open_idx + 1;
4955 while j < n {
4956 if consumed[j] {
4957 return None;
4958 }
4959 match &inlines[j] {
4960 Inline::Str(s) => {
4961 if s.ends_with(quote) {
4962 let next = inlines.get(j + 1);
4963 let after_is_boundary = match next {
4964 None => true,
4965 Some(Inline::Space | Inline::SoftBreak | Inline::LineBreak) => true,
4966 Some(Inline::Str(s)) => {
4967 s.chars().next().is_some_and(|c| !c.is_alphanumeric())
4968 }
4969 _ => false,
4970 };
4971 if after_is_boundary {
4972 return Some(j);
4973 }
4974 }
4975 }
4976 Inline::Space | Inline::SoftBreak | Inline::LineBreak => {}
4977 _ => {}
4979 }
4980 j += 1;
4981 if j - open_idx > 32 {
4983 return None;
4984 }
4985 }
4986 None
4987}
4988
4989fn clone_inline(inline: &Inline) -> Inline {
4990 match inline {
4991 Inline::Str(s) => Inline::Str(s.clone()),
4992 Inline::Space => Inline::Space,
4993 Inline::SoftBreak => Inline::SoftBreak,
4994 Inline::LineBreak => Inline::LineBreak,
4995 Inline::Emph(c) => Inline::Emph(c.iter().map(clone_inline).collect()),
4996 Inline::Strong(c) => Inline::Strong(c.iter().map(clone_inline).collect()),
4997 Inline::Strikeout(c) => Inline::Strikeout(c.iter().map(clone_inline).collect()),
4998 Inline::Superscript(c) => Inline::Superscript(c.iter().map(clone_inline).collect()),
4999 Inline::Subscript(c) => Inline::Subscript(c.iter().map(clone_inline).collect()),
5000 Inline::Code(a, s) => Inline::Code(a.clone(), s.clone()),
5001 Inline::Link(a, t, u, ti) => Inline::Link(
5002 a.clone(),
5003 t.iter().map(clone_inline).collect(),
5004 u.clone(),
5005 ti.clone(),
5006 ),
5007 Inline::Image(a, t, u, ti) => Inline::Image(
5008 a.clone(),
5009 t.iter().map(clone_inline).collect(),
5010 u.clone(),
5011 ti.clone(),
5012 ),
5013 Inline::Math(k, c) => Inline::Math(k, c.clone()),
5014 Inline::Span(a, c) => Inline::Span(a.clone(), c.iter().map(clone_inline).collect()),
5015 Inline::RawInline(f, c) => Inline::RawInline(f.clone(), c.clone()),
5016 Inline::Quoted(k, c) => Inline::Quoted(k, c.iter().map(clone_inline).collect()),
5017 Inline::Note(blocks) => Inline::Note(blocks.iter().map(clone_block).collect()),
5018 Inline::Cite(citations, text) => Inline::Cite(
5019 citations
5020 .iter()
5021 .map(|c| Citation {
5022 id: c.id.clone(),
5023 prefix: c.prefix.iter().map(clone_inline).collect(),
5024 suffix: c.suffix.iter().map(clone_inline).collect(),
5025 mode: c.mode,
5026 note_num: c.note_num,
5027 hash: c.hash,
5028 })
5029 .collect(),
5030 text.iter().map(clone_inline).collect(),
5031 ),
5032 Inline::Unsupported(s) => Inline::Unsupported(s.clone()),
5033 }
5034}
5035
5036fn clone_block(b: &Block) -> Block {
5037 match b {
5038 Block::Para(c) => Block::Para(c.iter().map(clone_inline).collect()),
5039 Block::Plain(c) => Block::Plain(c.iter().map(clone_inline).collect()),
5040 Block::Header(lvl, a, c) => {
5041 Block::Header(*lvl, a.clone(), c.iter().map(clone_inline).collect())
5042 }
5043 Block::BlockQuote(blocks) => Block::BlockQuote(blocks.iter().map(clone_block).collect()),
5044 Block::CodeBlock(a, s) => Block::CodeBlock(a.clone(), s.clone()),
5045 Block::HorizontalRule => Block::HorizontalRule,
5046 Block::BulletList(items) => Block::BulletList(
5047 items
5048 .iter()
5049 .map(|item| item.iter().map(clone_block).collect())
5050 .collect(),
5051 ),
5052 Block::OrderedList(start, style, delim, items) => Block::OrderedList(
5053 *start,
5054 style,
5055 delim,
5056 items
5057 .iter()
5058 .map(|item| item.iter().map(clone_block).collect())
5059 .collect(),
5060 ),
5061 Block::RawBlock(f, c) => Block::RawBlock(f.clone(), c.clone()),
5062 Block::Table(_) => Block::Unsupported("Table".to_string()),
5063 Block::Div(a, blocks) => Block::Div(a.clone(), blocks.iter().map(clone_block).collect()),
5064 Block::LineBlock(lines) => Block::LineBlock(
5065 lines
5066 .iter()
5067 .map(|line| line.iter().map(clone_inline).collect())
5068 .collect(),
5069 ),
5070 Block::DefinitionList(items) => Block::DefinitionList(
5071 items
5072 .iter()
5073 .map(|(term, defs)| {
5074 (
5075 term.iter().map(clone_inline).collect(),
5076 defs.iter()
5077 .map(|d| d.iter().map(clone_block).collect())
5078 .collect(),
5079 )
5080 })
5081 .collect(),
5082 ),
5083 Block::Figure(a, caption, body) => Block::Figure(
5084 a.clone(),
5085 caption.iter().map(clone_block).collect(),
5086 body.iter().map(clone_block).collect(),
5087 ),
5088 Block::Unsupported(s) => Block::Unsupported(s.clone()),
5089 }
5090}
5091
5092fn smart_dashes_and_ellipsis(s: &str) -> String {
5093 if !s.contains(['-', '.']) {
5094 return s.to_string();
5095 }
5096 let bytes = s.as_bytes();
5097 let mut out = String::with_capacity(s.len());
5098 let mut i = 0usize;
5099 while i < bytes.len() {
5100 if bytes[i] == b'-' {
5101 if i + 2 < bytes.len() && bytes[i + 1] == b'-' && bytes[i + 2] == b'-' {
5102 out.push('\u{2014}');
5103 i += 3;
5104 continue;
5105 }
5106 if i + 1 < bytes.len() && bytes[i + 1] == b'-' {
5107 out.push('\u{2013}');
5108 i += 2;
5109 continue;
5110 }
5111 }
5112 if bytes[i] == b'.' && i + 2 < bytes.len() && bytes[i + 1] == b'.' && bytes[i + 2] == b'.' {
5113 out.push('\u{2026}');
5114 i += 3;
5115 continue;
5116 }
5117 let len = utf8_char_len(bytes[i]);
5119 out.push_str(&s[i..i + len]);
5120 i += len;
5121 }
5122 out
5123}
5124
5125fn utf8_char_len(b: u8) -> usize {
5126 if b < 0xc0 {
5128 1
5129 } else if b < 0xe0 {
5130 2
5131 } else if b < 0xf0 {
5132 3
5133 } else {
5134 4
5135 }
5136}
5137
5138fn smart_intraword_apostrophe(s: &str) -> String {
5139 if !s.contains('\'') {
5140 return s.to_string();
5141 }
5142 let chars: Vec<char> = s.chars().collect();
5143 let mut out = String::with_capacity(s.len());
5144 for (i, &c) in chars.iter().enumerate() {
5145 if c == '\'' {
5146 let prev = i.checked_sub(1).map(|j| chars[j]);
5147 let next = chars.get(i + 1).copied();
5148 let prev_word = prev.is_some_and(is_word_char);
5149 let next_word = next.is_some_and(is_word_char);
5150 if prev_word && next_word {
5151 out.push('\u{2019}');
5152 continue;
5153 }
5154 }
5155 out.push(c);
5156 }
5157 out
5158}
5159
5160fn is_word_char(c: char) -> bool {
5161 c.is_alphanumeric()
5162}
5163
5164fn inlines_to_plaintext(inlines: &[Inline]) -> String {
5165 let mut s = String::new();
5166 for i in inlines {
5167 match i {
5168 Inline::Str(t) => s.push_str(t),
5169 Inline::Space | Inline::SoftBreak => s.push(' '),
5170 Inline::LineBreak => s.push(' '),
5171 Inline::Emph(children)
5172 | Inline::Strong(children)
5173 | Inline::Strikeout(children)
5174 | Inline::Superscript(children)
5175 | Inline::Subscript(children) => s.push_str(&inlines_to_plaintext(children)),
5176 Inline::Code(_, c) => s.push_str(c),
5177 Inline::Link(_, alt, _, _) | Inline::Image(_, alt, _, _) => {
5178 s.push_str(&inlines_to_plaintext(alt))
5179 }
5180 Inline::Math(_, c) => s.push_str(c),
5181 Inline::Span(_, children) => s.push_str(&inlines_to_plaintext(children)),
5182 Inline::RawInline(_, _) => {}
5183 Inline::Quoted(_, children) => s.push_str(&inlines_to_plaintext(children)),
5184 Inline::Note(_) => {}
5185 Inline::Cite(_, text) => s.push_str(&inlines_to_plaintext(text)),
5186 Inline::Unsupported(_) => {}
5187 }
5188 }
5189 s
5190}
5191
5192fn pandoc_slugify(text: &str) -> String {
5193 let mut out = String::new();
5196 let mut prev_dash = false;
5197 for ch in text.chars() {
5198 if ch.is_whitespace() {
5199 if !out.is_empty() && !prev_dash {
5200 out.push('-');
5201 prev_dash = true;
5202 }
5203 continue;
5204 }
5205 for lc in ch.to_lowercase() {
5206 if lc.is_alphanumeric() || lc == '_' || lc == '-' || lc == '.' {
5207 out.push(lc);
5208 prev_dash = lc == '-';
5209 }
5210 }
5211 }
5212 while out.ends_with('-') {
5213 out.pop();
5214 }
5215 out
5216}
5217
5218impl Attr {
5219 fn with_id(id: String) -> Self {
5220 Self {
5221 id,
5222 classes: Vec::new(),
5223 kvs: Vec::new(),
5224 }
5225 }
5226}
5227
5228fn write_block(b: &Block, out: &mut String) {
5231 match b {
5232 Block::Para(inlines) => {
5233 out.push_str("Para [");
5234 write_inline_list(inlines, out);
5235 out.push_str(" ]");
5236 }
5237 Block::Plain(inlines) => {
5238 out.push_str("Plain [");
5239 write_inline_list(inlines, out);
5240 out.push_str(" ]");
5241 }
5242 Block::Header(level, attr, inlines) => {
5243 out.push_str(&format!("Header {level} ("));
5244 write_attr(attr, out);
5245 out.push_str(") [");
5246 write_inline_list(inlines, out);
5247 out.push_str(" ]");
5248 }
5249 Block::BlockQuote(blocks) => {
5250 out.push_str("BlockQuote [");
5251 write_block_list(blocks, out);
5252 out.push_str(" ]");
5253 }
5254 Block::CodeBlock(attr, content) => {
5255 out.push_str("CodeBlock (");
5256 write_attr(attr, out);
5257 out.push_str(") ");
5258 write_haskell_string(content, out);
5259 }
5260 Block::HorizontalRule => out.push_str("HorizontalRule"),
5261 Block::BulletList(items) => {
5262 out.push_str("BulletList [");
5263 for (i, item) in items.iter().enumerate() {
5264 if i > 0 {
5265 out.push(',');
5266 }
5267 out.push_str(" [");
5268 write_block_list(item, out);
5269 out.push_str(" ]");
5270 }
5271 out.push_str(" ]");
5272 }
5273 Block::OrderedList(start, style, delim, items) => {
5274 out.push_str(&format!("OrderedList ( {start} , {style} , {delim} ) ["));
5275 for (i, item) in items.iter().enumerate() {
5276 if i > 0 {
5277 out.push(',');
5278 }
5279 out.push_str(" [");
5280 write_block_list(item, out);
5281 out.push_str(" ]");
5282 }
5283 out.push_str(" ]");
5284 }
5285 Block::RawBlock(format, content) => {
5286 out.push_str("RawBlock ( Format ");
5287 write_haskell_string(format, out);
5288 out.push_str(" ) ");
5289 write_haskell_string(content, out);
5290 }
5291 Block::Table(data) => {
5292 write_table(data, out);
5293 }
5294 Block::Div(attr, blocks) => {
5295 out.push_str("Div (");
5296 write_attr(attr, out);
5297 out.push_str(") [");
5298 write_block_list(blocks, out);
5299 out.push_str(" ]");
5300 }
5301 Block::LineBlock(lines) => {
5302 out.push_str("LineBlock [");
5303 for (i, line) in lines.iter().enumerate() {
5304 if i > 0 {
5305 out.push(',');
5306 }
5307 out.push_str(" [");
5308 write_inline_list(line, out);
5309 out.push_str(" ]");
5310 }
5311 out.push_str(" ]");
5312 }
5313 Block::DefinitionList(items) => {
5314 out.push_str("DefinitionList [");
5315 for (i, (term, defs)) in items.iter().enumerate() {
5316 if i > 0 {
5317 out.push(',');
5318 }
5319 out.push_str(" ( [");
5320 write_inline_list(term, out);
5321 out.push_str(" ] , [");
5322 for (j, def) in defs.iter().enumerate() {
5323 if j > 0 {
5324 out.push(',');
5325 }
5326 out.push_str(" [");
5327 write_block_list(def, out);
5328 out.push_str(" ]");
5329 }
5330 out.push_str(" ] )");
5331 }
5332 out.push_str(" ]");
5333 }
5334 Block::Figure(attr, caption, body) => {
5335 out.push_str("Figure (");
5336 write_attr(attr, out);
5337 out.push_str(") ( Caption Nothing [");
5338 write_block_list(caption, out);
5339 out.push_str(" ] ) [");
5340 write_block_list(body, out);
5341 out.push_str(" ]");
5342 }
5343 Block::Unsupported(name) => {
5344 out.push_str(&format!("Unsupported {name:?}"));
5345 }
5346 }
5347}
5348
5349fn write_table(data: &TableData, out: &mut String) {
5350 out.push_str("Table (");
5351 write_attr(&data.attr, out);
5352 out.push_str(") ( Caption Nothing [");
5353 if !data.caption.is_empty() {
5354 out.push_str(" Plain [");
5355 write_inline_list(&data.caption, out);
5356 out.push_str(" ]");
5357 }
5358 out.push_str(" ] ) [");
5359 for (i, align) in data.aligns.iter().enumerate() {
5360 if i > 0 {
5361 out.push(',');
5362 }
5363 let width = data.widths.get(i).copied().unwrap_or(None);
5364 match width {
5365 None => out.push_str(&format!(" ( {align} , ColWidthDefault )")),
5366 Some(w) => out.push_str(&format!(" ( {align} , ColWidth {} )", show_double(w))),
5367 }
5368 }
5369 out.push_str(" ] ( TableHead ( \"\" , [ ] , [ ] ) [");
5370 for (i, row) in data.head_rows.iter().enumerate() {
5371 if i > 0 {
5372 out.push(',');
5373 }
5374 out.push(' ');
5375 write_table_row(row, out);
5376 }
5377 out.push_str(" ] ) [ TableBody ( \"\" , [ ] , [ ] ) ( RowHeadColumns 0 ) [ ] [");
5378 for (i, row) in data.body_rows.iter().enumerate() {
5379 if i > 0 {
5380 out.push(',');
5381 }
5382 out.push(' ');
5383 write_table_row(row, out);
5384 }
5385 out.push_str(" ] ] ( TableFoot ( \"\" , [ ] , [ ] ) [");
5386 for (i, row) in data.foot_rows.iter().enumerate() {
5387 if i > 0 {
5388 out.push(',');
5389 }
5390 out.push(' ');
5391 write_table_row(row, out);
5392 }
5393 out.push_str(" ] )");
5394}
5395
5396fn write_table_row(cells: &[GridCell], out: &mut String) {
5397 out.push_str("Row ( \"\" , [ ] , [ ] ) [");
5398 for (i, cell) in cells.iter().enumerate() {
5399 if i > 0 {
5400 out.push(',');
5401 }
5402 out.push_str(&format!(
5403 " Cell ( \"\" , [ ] , [ ] ) AlignDefault ( RowSpan {} ) ( ColSpan {} ) [",
5404 cell.row_span, cell.col_span
5405 ));
5406 if !cell.blocks.is_empty() {
5407 write_block_list(&cell.blocks, out);
5408 }
5409 out.push_str(" ]");
5410 }
5411 out.push_str(" ]");
5412}
5413
5414fn write_block_list(blocks: &[Block], out: &mut String) {
5415 for (i, b) in blocks.iter().enumerate() {
5416 if i > 0 {
5417 out.push(',');
5418 }
5419 out.push(' ');
5420 write_block(b, out);
5421 }
5422}
5423
5424fn write_inline_list(inlines: &[Inline], out: &mut String) {
5425 for (i, inline) in inlines.iter().enumerate() {
5426 if i > 0 {
5427 out.push(',');
5428 }
5429 out.push(' ');
5430 write_inline(inline, out);
5431 }
5432}
5433
5434fn write_inline(inline: &Inline, out: &mut String) {
5435 match inline {
5436 Inline::Str(s) => {
5437 out.push_str("Str ");
5438 write_haskell_string(s, out);
5439 }
5440 Inline::Space => out.push_str("Space"),
5441 Inline::SoftBreak => out.push_str("SoftBreak"),
5442 Inline::LineBreak => out.push_str("LineBreak"),
5443 Inline::Emph(children) => {
5444 out.push_str("Emph [");
5445 write_inline_list(children, out);
5446 out.push_str(" ]");
5447 }
5448 Inline::Strong(children) => {
5449 out.push_str("Strong [");
5450 write_inline_list(children, out);
5451 out.push_str(" ]");
5452 }
5453 Inline::Strikeout(children) => {
5454 out.push_str("Strikeout [");
5455 write_inline_list(children, out);
5456 out.push_str(" ]");
5457 }
5458 Inline::Superscript(children) => {
5459 out.push_str("Superscript [");
5460 write_inline_list(children, out);
5461 out.push_str(" ]");
5462 }
5463 Inline::Subscript(children) => {
5464 out.push_str("Subscript [");
5465 write_inline_list(children, out);
5466 out.push_str(" ]");
5467 }
5468 Inline::Code(attr, content) => {
5469 out.push_str("Code (");
5470 write_attr(attr, out);
5471 out.push_str(") ");
5472 write_haskell_string(content, out);
5473 }
5474 Inline::Link(attr, text, url, title) => {
5475 out.push_str("Link (");
5476 write_attr(attr, out);
5477 out.push_str(") [");
5478 write_inline_list(text, out);
5479 out.push_str(" ] ( ");
5480 write_haskell_string(url, out);
5481 out.push_str(" , ");
5482 write_haskell_string(title, out);
5483 out.push_str(" )");
5484 }
5485 Inline::Image(attr, alt, url, title) => {
5486 out.push_str("Image (");
5487 write_attr(attr, out);
5488 out.push_str(") [");
5489 write_inline_list(alt, out);
5490 out.push_str(" ] ( ");
5491 write_haskell_string(url, out);
5492 out.push_str(" , ");
5493 write_haskell_string(title, out);
5494 out.push_str(" )");
5495 }
5496 Inline::Math(kind, content) => {
5497 out.push_str("Math ");
5498 out.push_str(kind);
5499 out.push(' ');
5500 write_haskell_string(content, out);
5501 }
5502 Inline::Span(attr, children) => {
5503 out.push_str("Span (");
5504 write_attr(attr, out);
5505 out.push_str(") [");
5506 write_inline_list(children, out);
5507 out.push_str(" ]");
5508 }
5509 Inline::RawInline(format, content) => {
5510 out.push_str("RawInline ( Format ");
5511 write_haskell_string(format, out);
5512 out.push_str(" ) ");
5513 write_haskell_string(content, out);
5514 }
5515 Inline::Quoted(kind, children) => {
5516 out.push_str("Quoted ");
5517 out.push_str(kind);
5518 out.push_str(" [");
5519 write_inline_list(children, out);
5520 out.push_str(" ]");
5521 }
5522 Inline::Note(blocks) => {
5523 out.push_str("Note [");
5524 write_block_list(blocks, out);
5525 out.push_str(" ]");
5526 }
5527 Inline::Cite(citations, text) => {
5528 out.push_str("Cite [");
5529 for (i, c) in citations.iter().enumerate() {
5530 if i > 0 {
5531 out.push(',');
5532 }
5533 out.push_str(" Citation { citationId = ");
5534 write_haskell_string(&c.id, out);
5535 out.push_str(" , citationPrefix = [");
5536 write_inline_list(&c.prefix, out);
5537 out.push_str(" ] , citationSuffix = [");
5538 write_inline_list(&c.suffix, out);
5539 out.push_str(" ] , citationMode = ");
5540 out.push_str(match c.mode {
5541 CitationMode::AuthorInText => "AuthorInText",
5542 CitationMode::NormalCitation => "NormalCitation",
5543 CitationMode::SuppressAuthor => "SuppressAuthor",
5544 });
5545 out.push_str(&format!(
5546 " , citationNoteNum = {} , citationHash = {} }}",
5547 c.note_num, c.hash
5548 ));
5549 }
5550 out.push_str(" ] [");
5551 write_inline_list(text, out);
5552 out.push_str(" ]");
5553 }
5554 Inline::Unsupported(name) => {
5555 out.push_str(&format!("Unsupported {name:?}"));
5556 }
5557 }
5558}
5559
5560fn write_attr(attr: &Attr, out: &mut String) {
5561 out.push(' ');
5562 write_haskell_string(&attr.id, out);
5563 out.push_str(" , [");
5564 for (i, c) in attr.classes.iter().enumerate() {
5565 if i > 0 {
5566 out.push(',');
5567 }
5568 out.push(' ');
5569 write_haskell_string(c, out);
5570 }
5571 if !attr.classes.is_empty() {
5572 out.push(' ');
5573 }
5574 out.push_str("] , [");
5575 for (i, (k, v)) in attr.kvs.iter().enumerate() {
5576 if i > 0 {
5577 out.push(',');
5578 }
5579 out.push_str(" ( ");
5580 write_haskell_string(k, out);
5581 out.push_str(" , ");
5582 write_haskell_string(v, out);
5583 out.push_str(" )");
5584 }
5585 if !attr.kvs.is_empty() {
5586 out.push(' ');
5587 }
5588 out.push_str("] ");
5589}
5590
5591fn write_haskell_string(s: &str, out: &mut String) {
5592 out.push('"');
5593 let mut prev_was_numeric_escape = false;
5594 for ch in s.chars() {
5595 let code = ch as u32;
5596 let is_ascii_printable = (0x20..0x7f).contains(&code);
5597 match ch {
5598 '"' => {
5599 out.push_str("\\\"");
5600 prev_was_numeric_escape = false;
5601 }
5602 '\\' => {
5603 out.push_str("\\\\");
5604 prev_was_numeric_escape = false;
5605 }
5606 '\n' => {
5607 out.push_str("\\n");
5608 prev_was_numeric_escape = false;
5609 }
5610 '\t' => {
5611 out.push_str("\\t");
5612 prev_was_numeric_escape = false;
5613 }
5614 '\r' => {
5615 out.push_str("\\r");
5616 prev_was_numeric_escape = false;
5617 }
5618 _ if is_ascii_printable => {
5619 if prev_was_numeric_escape && ch.is_ascii_digit() {
5622 out.push_str("\\&");
5623 }
5624 out.push(ch);
5625 prev_was_numeric_escape = false;
5626 }
5627 _ => {
5628 out.push('\\');
5630 out.push_str(&code.to_string());
5631 prev_was_numeric_escape = true;
5632 }
5633 }
5634 }
5635 out.push('"');
5636}
5637
5638fn attr_to_json(attr: &Attr) -> Value {
5646 let kvs: Vec<Value> = attr.kvs.iter().map(|(k, v)| json!([k, v])).collect();
5647 json!([attr.id, attr.classes, kvs])
5648}
5649
5650fn target_to_json(url: &str, title: &str) -> Value {
5651 json!([url, title])
5652}
5653
5654fn inlines_to_json(inlines: &[Inline]) -> Vec<Value> {
5655 inlines.iter().map(inline_to_json).collect()
5656}
5657
5658fn blocks_to_json(blocks: &[Block]) -> Vec<Value> {
5659 blocks.iter().map(block_to_json).collect()
5660}
5661
5662fn citation_to_json(c: &Citation) -> Value {
5663 let mode = match c.mode {
5664 CitationMode::AuthorInText => "AuthorInText",
5665 CitationMode::NormalCitation => "NormalCitation",
5666 CitationMode::SuppressAuthor => "SuppressAuthor",
5667 };
5668 json!({
5669 "citationId": c.id,
5670 "citationPrefix": inlines_to_json(&c.prefix),
5671 "citationSuffix": inlines_to_json(&c.suffix),
5672 "citationMode": { "t": mode },
5673 "citationNoteNum": c.note_num,
5674 "citationHash": c.hash,
5675 })
5676}
5677
5678fn inline_to_json(inline: &Inline) -> Value {
5679 match inline {
5680 Inline::Str(s) => json!({ "t": "Str", "c": s }),
5681 Inline::Space => json!({ "t": "Space" }),
5682 Inline::SoftBreak => json!({ "t": "SoftBreak" }),
5683 Inline::LineBreak => json!({ "t": "LineBreak" }),
5684 Inline::Emph(children) => json!({ "t": "Emph", "c": inlines_to_json(children) }),
5685 Inline::Strong(children) => json!({ "t": "Strong", "c": inlines_to_json(children) }),
5686 Inline::Strikeout(children) => {
5687 json!({ "t": "Strikeout", "c": inlines_to_json(children) })
5688 }
5689 Inline::Superscript(children) => {
5690 json!({ "t": "Superscript", "c": inlines_to_json(children) })
5691 }
5692 Inline::Subscript(children) => {
5693 json!({ "t": "Subscript", "c": inlines_to_json(children) })
5694 }
5695 Inline::Code(attr, content) => {
5696 json!({ "t": "Code", "c": [attr_to_json(attr), content] })
5697 }
5698 Inline::Link(attr, text, url, title) => json!({
5699 "t": "Link",
5700 "c": [attr_to_json(attr), inlines_to_json(text), target_to_json(url, title)],
5701 }),
5702 Inline::Image(attr, alt, url, title) => json!({
5703 "t": "Image",
5704 "c": [attr_to_json(attr), inlines_to_json(alt), target_to_json(url, title)],
5705 }),
5706 Inline::Math(kind, content) => json!({
5707 "t": "Math",
5708 "c": [{ "t": kind }, content],
5709 }),
5710 Inline::Span(attr, children) => json!({
5711 "t": "Span",
5712 "c": [attr_to_json(attr), inlines_to_json(children)],
5713 }),
5714 Inline::RawInline(format, content) => json!({
5715 "t": "RawInline",
5716 "c": [format, content],
5717 }),
5718 Inline::Quoted(kind, children) => json!({
5719 "t": "Quoted",
5720 "c": [{ "t": kind }, inlines_to_json(children)],
5721 }),
5722 Inline::Note(blocks) => json!({ "t": "Note", "c": blocks_to_json(blocks) }),
5723 Inline::Cite(citations, text) => json!({
5724 "t": "Cite",
5725 "c": [
5726 citations.iter().map(citation_to_json).collect::<Vec<_>>(),
5727 inlines_to_json(text),
5728 ],
5729 }),
5730 Inline::Unsupported(name) => json!({ "t": "Unsupported", "c": name }),
5731 }
5732}
5733
5734fn block_to_json(b: &Block) -> Value {
5735 match b {
5736 Block::Para(inlines) => json!({ "t": "Para", "c": inlines_to_json(inlines) }),
5737 Block::Plain(inlines) => json!({ "t": "Plain", "c": inlines_to_json(inlines) }),
5738 Block::Header(level, attr, inlines) => json!({
5739 "t": "Header",
5740 "c": [level, attr_to_json(attr), inlines_to_json(inlines)],
5741 }),
5742 Block::BlockQuote(blocks) => {
5743 json!({ "t": "BlockQuote", "c": blocks_to_json(blocks) })
5744 }
5745 Block::CodeBlock(attr, content) => json!({
5746 "t": "CodeBlock",
5747 "c": [attr_to_json(attr), content],
5748 }),
5749 Block::HorizontalRule => json!({ "t": "HorizontalRule" }),
5750 Block::BulletList(items) => {
5751 let items_json: Vec<Vec<Value>> = items.iter().map(|it| blocks_to_json(it)).collect();
5752 json!({ "t": "BulletList", "c": items_json })
5753 }
5754 Block::OrderedList(start, style, delim, items) => {
5755 let items_json: Vec<Vec<Value>> = items.iter().map(|it| blocks_to_json(it)).collect();
5756 json!({
5757 "t": "OrderedList",
5758 "c": [
5759 [json!(start), json!({ "t": style }), json!({ "t": delim })],
5760 items_json,
5761 ],
5762 })
5763 }
5764 Block::RawBlock(format, content) => json!({
5765 "t": "RawBlock",
5766 "c": [format, content],
5767 }),
5768 Block::Table(data) => table_to_json(data),
5769 Block::Div(attr, blocks) => json!({
5770 "t": "Div",
5771 "c": [attr_to_json(attr), blocks_to_json(blocks)],
5772 }),
5773 Block::LineBlock(lines) => {
5774 let lines_json: Vec<Vec<Value>> =
5775 lines.iter().map(|line| inlines_to_json(line)).collect();
5776 json!({ "t": "LineBlock", "c": lines_json })
5777 }
5778 Block::DefinitionList(items) => {
5779 let items_json: Vec<Value> = items
5780 .iter()
5781 .map(|(term, defs)| {
5782 let defs_json: Vec<Vec<Value>> =
5783 defs.iter().map(|d| blocks_to_json(d)).collect();
5784 json!([inlines_to_json(term), defs_json])
5785 })
5786 .collect();
5787 json!({ "t": "DefinitionList", "c": items_json })
5788 }
5789 Block::Figure(attr, caption, body) => {
5790 let caption_json = json!([Value::Null, blocks_to_json(caption)]);
5793 json!({
5794 "t": "Figure",
5795 "c": [attr_to_json(attr), caption_json, blocks_to_json(body)],
5796 })
5797 }
5798 Block::Unsupported(name) => json!({ "t": "Unsupported", "c": name }),
5799 }
5800}
5801
5802fn table_to_json(data: &TableData) -> Value {
5803 let caption_blocks: Vec<Value> = if data.caption.is_empty() {
5805 Vec::new()
5806 } else {
5807 vec![json!({ "t": "Plain", "c": inlines_to_json(&data.caption) })]
5808 };
5809 let caption_json = json!([Value::Null, caption_blocks]);
5810
5811 let colspecs: Vec<Value> = data
5814 .aligns
5815 .iter()
5816 .enumerate()
5817 .map(|(i, align)| {
5818 let width = data.widths.get(i).copied().unwrap_or(None);
5819 let width_json = match width {
5820 None => json!({ "t": "ColWidthDefault" }),
5821 Some(w) => json!({ "t": "ColWidth", "c": w }),
5822 };
5823 json!([{ "t": align }, width_json])
5824 })
5825 .collect();
5826
5827 let empty_attr = json!(["", Vec::<Value>::new(), Vec::<Value>::new()]);
5828
5829 let head_rows: Vec<Value> = data
5830 .head_rows
5831 .iter()
5832 .map(|r| table_row_to_json(r))
5833 .collect();
5834 let body_rows: Vec<Value> = data
5835 .body_rows
5836 .iter()
5837 .map(|r| table_row_to_json(r))
5838 .collect();
5839 let foot_rows: Vec<Value> = data
5840 .foot_rows
5841 .iter()
5842 .map(|r| table_row_to_json(r))
5843 .collect();
5844
5845 let table_head = json!([empty_attr, head_rows]);
5846 let table_bodies = json!([[empty_attr, 0, Vec::<Value>::new(), body_rows,]]);
5847 let table_foot = json!([empty_attr, foot_rows]);
5848
5849 json!({
5850 "t": "Table",
5851 "c": [
5852 attr_to_json(&data.attr),
5853 caption_json,
5854 colspecs,
5855 table_head,
5856 table_bodies,
5857 table_foot,
5858 ],
5859 })
5860}
5861
5862fn table_row_to_json(cells: &[GridCell]) -> Value {
5863 let empty_attr = json!(["", Vec::<Value>::new(), Vec::<Value>::new()]);
5864 let cells_json: Vec<Value> = cells
5865 .iter()
5866 .map(|cell| {
5867 json!([
5868 empty_attr,
5869 { "t": "AlignDefault" },
5870 cell.row_span,
5871 cell.col_span,
5872 blocks_to_json(&cell.blocks),
5873 ])
5874 })
5875 .collect();
5876 json!([empty_attr, cells_json])
5877}
5878
5879#[cfg(test)]
5880mod tests {
5881 use super::*;
5882 use crate::parser::parse;
5883 use serde_json::Value;
5884
5885 fn parse_to_json(input: &str) -> Value {
5886 let tree = parse(input, None);
5887 let s = to_pandoc_json(&tree);
5888 serde_json::from_str(&s).expect("to_pandoc_json must emit valid JSON")
5889 }
5890
5891 #[test]
5892 fn empty_doc_emits_envelope_with_no_blocks() {
5893 let v = parse_to_json("");
5894 assert_eq!(v["pandoc-api-version"], serde_json::json!([1, 23, 1, 1]));
5895 assert_eq!(v["meta"], serde_json::json!({}));
5896 assert_eq!(v["blocks"], serde_json::json!([]));
5897 }
5898
5899 #[test]
5900 fn paragraph_with_str_emits_para_str_shape() {
5901 let v = parse_to_json("hello");
5902 let blocks = v["blocks"].as_array().expect("blocks is array");
5903 assert_eq!(blocks.len(), 1);
5904 let para = &blocks[0];
5905 assert_eq!(para["t"], "Para");
5906 let inlines = para["c"].as_array().expect("Para.c is array");
5907 assert_eq!(inlines.len(), 1);
5908 assert_eq!(inlines[0]["t"], "Str");
5909 assert_eq!(inlines[0]["c"], "hello");
5910 }
5911
5912 #[test]
5913 fn nullary_constructors_omit_c_key() {
5914 let v = parse_to_json("a b");
5916 let inlines = v["blocks"][0]["c"].as_array().expect("Para.c is array");
5917 let space = inlines
5919 .iter()
5920 .find(|i| i["t"] == "Space")
5921 .expect("Space inline present");
5922 let space_obj = space.as_object().expect("Space is JSON object");
5923 assert!(
5924 !space_obj.contains_key("c"),
5925 "nullary constructors must omit the \"c\" key, got {space:?}",
5926 );
5927 }
5928
5929 #[test]
5930 fn header_attr_shape_matches_pandoc_tuple() {
5931 let v = parse_to_json("# Hi {#foo .bar key=val}");
5933 let header = &v["blocks"][0];
5934 assert_eq!(header["t"], "Header");
5935 let c = header["c"].as_array().expect("Header.c is array");
5936 assert_eq!(c.len(), 3);
5937 assert_eq!(c[0], 1, "level");
5938 let attr = c[1].as_array().expect("attr tuple");
5940 assert_eq!(attr[0], "foo");
5941 assert_eq!(attr[1], serde_json::json!(["bar"]));
5942 assert_eq!(attr[2], serde_json::json!([["key", "val"]]));
5943 }
5944}