1use std::cell::RefCell;
19use std::collections::{HashMap, HashSet};
20
21use crate::SyntaxNode;
22use crate::syntax::SyntaxKind;
23use rowan::NodeOrToken;
24
25#[derive(Default)]
26struct RefsCtx {
27 refs: HashMap<String, (String, String)>,
28 heading_ids: HashSet<String>,
29 heading_id_by_offset: HashMap<u32, String>,
34 footnotes: HashMap<String, Vec<Block>>,
38 example_label_to_num: HashMap<String, usize>,
43 example_list_start_by_offset: HashMap<u32, usize>,
48 cite_note_num_by_offset: HashMap<u32, i64>,
52}
53
54thread_local! {
55 static REFS_CTX: RefCell<RefsCtx> = RefCell::new(RefsCtx::default());
56}
57
58pub fn to_pandoc_ast(tree: &SyntaxNode) -> String {
66 let ctx = build_refs_ctx(tree);
67 REFS_CTX.with(|c| *c.borrow_mut() = ctx);
68 let blocks = blocks_from_doc(tree);
69 let mut out = String::new();
70 out.push('[');
71 for (i, b) in blocks.iter().enumerate() {
72 if i > 0 {
73 out.push(',');
74 }
75 out.push(' ');
76 write_block(b, &mut out);
77 }
78 out.push_str(" ]");
79 REFS_CTX.with(|c| *c.borrow_mut() = RefsCtx::default());
80 out
81}
82
83fn build_refs_ctx(tree: &SyntaxNode) -> RefsCtx {
84 let mut ctx = RefsCtx::default();
85 collect_cite_note_nums(tree, &mut ctx);
89 let mut example_counter: usize = 0;
93 collect_example_numbering(tree, &mut ctx, &mut example_counter);
94 REFS_CTX.with(|c| {
99 let mut borrowed = c.borrow_mut();
100 borrowed.cite_note_num_by_offset = ctx.cite_note_num_by_offset.clone();
101 borrowed.example_label_to_num = ctx.example_label_to_num.clone();
102 borrowed.example_list_start_by_offset = ctx.example_list_start_by_offset.clone();
103 });
104 let mut seen_ids: HashMap<String, u32> = HashMap::new();
105 collect_refs_and_headings(tree, &mut ctx, &mut seen_ids);
106 ctx
107}
108
109fn collect_cite_note_nums(tree: &SyntaxNode, ctx: &mut RefsCtx) {
115 let mut footnote_def_nodes: HashMap<String, SyntaxNode> = HashMap::new();
116 for child in tree.descendants() {
117 if child.kind() == SyntaxKind::FOOTNOTE_DEFINITION
118 && let Some(label) = footnote_label(&child)
119 {
120 footnote_def_nodes.entry(label).or_insert(child);
121 }
122 }
123 let mut counter: i64 = 0;
124 for child in tree.children() {
125 if child.kind() == SyntaxKind::FOOTNOTE_DEFINITION {
126 continue;
127 }
128 visit_for_cite_nums(&child, &footnote_def_nodes, &mut counter, None, ctx);
129 }
130}
131
132fn visit_for_cite_nums(
133 node: &SyntaxNode,
134 fn_defs: &HashMap<String, SyntaxNode>,
135 counter: &mut i64,
136 in_fn: Option<i64>,
137 ctx: &mut RefsCtx,
138) {
139 for el in node.children_with_tokens() {
140 if let NodeOrToken::Node(n) = el {
141 match n.kind() {
142 SyntaxKind::CITATION => {
143 let offset: u32 = n.text_range().start().into();
144 let num = if let Some(fn_num) = in_fn {
145 fn_num
146 } else {
147 *counter += 1;
148 *counter
149 };
150 ctx.cite_note_num_by_offset.insert(offset, num);
151 }
152 SyntaxKind::FOOTNOTE_REFERENCE => {
153 if in_fn.is_none() {
154 *counter += 1;
155 let fn_num = *counter;
156 if let Some(label) = footnote_label(&n)
157 && let Some(def) = fn_defs.get(&label)
158 {
159 visit_for_cite_nums(def, fn_defs, counter, Some(fn_num), ctx);
160 }
161 }
162 }
163 _ => visit_for_cite_nums(&n, fn_defs, counter, in_fn, ctx),
164 }
165 }
166 }
167}
168
169fn collect_example_numbering(node: &SyntaxNode, ctx: &mut RefsCtx, counter: &mut usize) {
175 for child in node.children() {
176 if child.kind() == SyntaxKind::LIST && list_is_example(&child) {
177 let list_offset: u32 = child.text_range().start().into();
178 ctx.example_list_start_by_offset
179 .insert(list_offset, *counter + 1);
180 for item in child
181 .children()
182 .filter(|c| c.kind() == SyntaxKind::LIST_ITEM)
183 {
184 *counter += 1;
185 if let Some(label) = example_item_label(&item) {
186 ctx.example_label_to_num.entry(label).or_insert(*counter);
187 }
188 }
189 collect_example_numbering(&child, ctx, counter);
192 } else {
193 collect_example_numbering(&child, ctx, counter);
194 }
195 }
196}
197
198fn list_is_example(list: &SyntaxNode) -> bool {
202 let Some(item) = list.children().find(|c| c.kind() == SyntaxKind::LIST_ITEM) else {
203 return false;
204 };
205 let marker = list_item_marker_text(&item);
206 let trimmed = marker.trim();
207 let body = if let Some(inner) = trimmed.strip_prefix('(').and_then(|s| s.strip_suffix(')')) {
208 inner
209 } else if let Some(inner) = trimmed.strip_suffix(')') {
210 inner
211 } else if let Some(inner) = trimmed.strip_suffix('.') {
212 inner
213 } else {
214 trimmed
215 };
216 body.starts_with('@')
217 && body[1..]
218 .chars()
219 .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
220}
221
222fn list_item_marker_text(item: &SyntaxNode) -> String {
223 item.children_with_tokens()
224 .filter_map(|el| el.into_token())
225 .find(|t| t.kind() == SyntaxKind::LIST_MARKER)
226 .map(|t| t.text().to_string())
227 .unwrap_or_default()
228}
229
230fn example_item_label(item: &SyntaxNode) -> Option<String> {
233 let marker = list_item_marker_text(item);
234 let trimmed = marker.trim();
235 let body = trimmed
236 .strip_prefix('(')
237 .and_then(|s| s.strip_suffix(')'))
238 .or_else(|| trimmed.strip_suffix(')'))
239 .or_else(|| trimmed.strip_suffix('.'))
240 .unwrap_or(trimmed);
241 let label = body.strip_prefix('@')?;
242 if label.is_empty() {
243 None
244 } else {
245 Some(label.to_string())
246 }
247}
248
249fn collect_refs_and_headings(
250 node: &SyntaxNode,
251 ctx: &mut RefsCtx,
252 seen_ids: &mut HashMap<String, u32>,
253) {
254 for child in node.children() {
255 match child.kind() {
256 SyntaxKind::REFERENCE_DEFINITION => {
257 if let Some((label, url, title)) = parse_reference_def(&child) {
258 ctx.refs
259 .entry(normalize_ref_label(&label))
260 .or_insert((url, title));
261 }
262 }
263 SyntaxKind::FOOTNOTE_DEFINITION => {
264 if let Some((label, blocks)) = parse_footnote_def(&child) {
265 ctx.footnotes.entry(label).or_insert(blocks);
266 }
267 }
268 SyntaxKind::HEADING => {
269 let (id, was_explicit) = heading_id_with_explicitness(&child);
270 let final_id = if was_explicit {
271 seen_ids.entry(id.clone()).or_insert(0);
274 id
275 } else {
276 let mut base = id;
277 if base.is_empty() {
278 base = "section".to_string();
279 }
280 let count = seen_ids.entry(base.clone()).or_insert(0);
281 let id = if *count == 0 {
282 base
283 } else {
284 format!("{base}-{count}")
285 };
286 *count += 1;
287 id
288 };
289 if !final_id.is_empty() {
290 let offset: u32 = child.text_range().start().into();
291 ctx.heading_ids.insert(final_id.clone());
292 ctx.heading_id_by_offset.insert(offset, final_id);
293 }
294 collect_refs_and_headings(&child, ctx, seen_ids);
295 }
296 _ => collect_refs_and_headings(&child, ctx, seen_ids),
297 }
298 }
299}
300
301fn heading_id_with_explicitness(node: &SyntaxNode) -> (String, bool) {
305 let inlines = node
306 .children()
307 .find(|c| c.kind() == SyntaxKind::HEADING_CONTENT)
308 .map(|c| coalesce_inlines(inlines_from(&c)))
309 .unwrap_or_default();
310 let attr = node.children_with_tokens().find_map(|el| match el {
311 NodeOrToken::Node(n) if n.kind() == SyntaxKind::ATTRIBUTE => Some(n.text().to_string()),
312 NodeOrToken::Token(t) if t.kind() == SyntaxKind::ATTRIBUTE => Some(t.text().to_string()),
313 _ => None,
314 });
315 if let Some(raw) = attr {
316 let trimmed = raw.trim();
317 if let Some(inner) = trimmed.strip_prefix('{').and_then(|s| s.strip_suffix('}')) {
318 let parsed = parse_attr_block(inner);
319 if !parsed.id.is_empty() {
320 return (parsed.id, true);
321 }
322 }
323 }
324 (pandoc_slugify(&inlines_to_plaintext(&inlines)), false)
325}
326
327fn parse_footnote_def(node: &SyntaxNode) -> Option<(String, Vec<Block>)> {
328 let label = footnote_label(node)?;
329 let mut blocks = Vec::new();
330 for child in node.children() {
331 if child.kind() == SyntaxKind::CODE_BLOCK
338 && !child
339 .children()
340 .any(|c| c.kind() == SyntaxKind::CODE_FENCE_OPEN)
341 {
342 blocks.push(indented_code_block_with_extra_strip(&child, 4));
343 } else {
344 collect_block(&child, &mut blocks);
345 }
346 }
347 Some((label, blocks))
348}
349
350fn indented_code_block_with_extra_strip(node: &SyntaxNode, extra: usize) -> Block {
351 let raw_format = code_block_raw_format(node);
352 let attr = code_block_attr(node);
353 let is_fenced = node
354 .children()
355 .any(|c| c.kind() == SyntaxKind::CODE_FENCE_OPEN);
356 let mut content = String::new();
357 for child in node.children() {
358 if child.kind() == SyntaxKind::CODE_CONTENT {
359 content.push_str(&child.text().to_string());
360 }
361 }
362 while content.ends_with('\n') {
363 content.pop();
364 }
365 content = content
370 .split('\n')
371 .map(expand_tabs_to_4)
372 .collect::<Vec<_>>()
373 .join("\n");
374 content = strip_leading_spaces_per_line(&content, extra);
375 if !is_fenced {
376 content = strip_indented_code_indent(&content);
377 }
378 if let Some(fmt) = raw_format {
379 return Block::RawBlock(fmt, content);
380 }
381 Block::CodeBlock(attr, content)
382}
383
384fn strip_leading_spaces_per_line(s: &str, n: usize) -> String {
385 let mut out = String::with_capacity(s.len());
386 for (i, line) in s.split('\n').enumerate() {
387 if i > 0 {
388 out.push('\n');
389 }
390 let to_strip = line.chars().take(n).take_while(|&c| c == ' ').count();
391 out.push_str(&line[to_strip..]);
392 }
393 out
394}
395
396fn footnote_label(node: &SyntaxNode) -> Option<String> {
397 for el in node.children_with_tokens() {
398 if let NodeOrToken::Token(t) = el
399 && t.kind() == SyntaxKind::FOOTNOTE_LABEL_ID
400 {
401 return Some(t.text().to_string());
402 }
403 }
404 None
405}
406
407fn parse_reference_def(node: &SyntaxNode) -> Option<(String, String, String)> {
408 let link = node.children().find(|c| c.kind() == SyntaxKind::LINK)?;
409 let label_node = link
410 .children()
411 .find(|c| c.kind() == SyntaxKind::LINK_TEXT)?;
412 let label = label_node.text().to_string();
413
414 let mut tail = String::new();
415 let mut after_link = false;
416 for el in node.children_with_tokens() {
417 if after_link {
418 match el {
419 NodeOrToken::Token(t) => tail.push_str(t.text()),
420 NodeOrToken::Node(n) => tail.push_str(&n.text().to_string()),
421 }
422 } else if let NodeOrToken::Node(n) = &el
423 && n.kind() == SyntaxKind::LINK
424 {
425 after_link = true;
426 }
427 }
428
429 let trimmed = tail.trim_start();
430 let rest = trimmed.strip_prefix(':')?;
431 let after_colon = rest.trim_start();
432 let (url, after_url) = parse_ref_url(after_colon);
433 let title = parse_dest_title(after_url.trim());
434 Some((unescape_label(&label), url, title))
435}
436
437fn parse_ref_url(s: &str) -> (String, &str) {
438 let s = s.trim_start();
439 if let Some(rest) = s.strip_prefix('<')
440 && let Some(end) = rest.find('>')
441 {
442 return (rest[..end].to_string(), &rest[end + 1..]);
443 }
444 let end = s.find(|c: char| c.is_whitespace()).unwrap_or(s.len());
445 (s[..end].to_string(), &s[end..])
446}
447
448fn unescape_label(label: &str) -> String {
449 let mut out = String::with_capacity(label.len());
450 let mut chars = label.chars().peekable();
451 while let Some(ch) = chars.next() {
452 if ch == '\\'
453 && let Some(&next) = chars.peek()
454 && is_ascii_punct(next)
455 {
456 out.push(next);
457 chars.next();
458 } else {
459 out.push(ch);
460 }
461 }
462 out
463}
464
465fn is_ascii_punct(c: char) -> bool {
466 c.is_ascii() && (c.is_ascii_punctuation())
467}
468
469fn normalize_ref_label(label: &str) -> String {
472 let unescaped = unescape_label(label);
473 let mut out = String::new();
474 let mut last_space = false;
475 for ch in unescaped.chars() {
476 if ch.is_whitespace() {
477 if !out.is_empty() && !last_space {
478 out.push(' ');
479 last_space = true;
480 }
481 } else {
482 for lc in ch.to_lowercase() {
483 out.push(lc);
484 }
485 last_space = false;
486 }
487 }
488 if last_space {
489 out.pop();
490 }
491 out
492}
493
494fn lookup_ref(label: &str) -> Option<(String, String)> {
495 let key = normalize_ref_label(label);
496 REFS_CTX.with(|c| c.borrow().refs.get(&key).cloned())
497}
498
499fn lookup_heading_id(label: &str) -> Option<String> {
500 let id = pandoc_slugify(&unescape_label(label));
501 if id.is_empty() {
502 return None;
503 }
504 REFS_CTX.with(|c| {
505 if c.borrow().heading_ids.contains(&id) {
506 Some(id)
507 } else {
508 None
509 }
510 })
511}
512
513pub fn normalize_native(s: &str) -> String {
517 let mut tokens = Vec::new();
518 let bytes = s.as_bytes();
519 let mut i = 0usize;
520 while i < bytes.len() {
521 let c = bytes[i];
522 match c {
523 b' ' | b'\t' | b'\n' | b'\r' => {
524 i += 1;
525 }
526 b'[' | b']' | b'(' | b')' | b',' => {
527 tokens.push((c as char).to_string());
528 i += 1;
529 }
530 b'"' => {
531 let start = i;
533 i += 1;
534 while i < bytes.len() {
535 match bytes[i] {
536 b'\\' if i + 1 < bytes.len() => {
537 i += 2;
538 }
539 b'"' => {
540 i += 1;
541 break;
542 }
543 _ => {
544 i += 1;
545 }
546 }
547 }
548 tokens.push(s[start..i].to_string());
549 }
550 _ => {
551 let start = i;
552 while i < bytes.len() {
553 let b = bytes[i];
554 if matches!(
555 b,
556 b' ' | b'\t' | b'\n' | b'\r' | b'[' | b']' | b'(' | b')' | b',' | b'"'
557 ) {
558 break;
559 }
560 i += 1;
561 }
562 if i > start {
563 tokens.push(s[start..i].to_string());
564 }
565 }
566 }
567 }
568 tokens.join(" ")
569}
570
571#[derive(Debug)]
575#[allow(clippy::enum_variant_names)]
576enum Block {
577 Para(Vec<Inline>),
578 Plain(Vec<Inline>),
579 Header(usize, Attr, Vec<Inline>),
580 BlockQuote(Vec<Block>),
581 CodeBlock(Attr, String),
582 HorizontalRule,
583 BulletList(Vec<Vec<Block>>),
584 OrderedList(usize, &'static str, &'static str, Vec<Vec<Block>>),
585 RawBlock(String, String),
586 Table(TableData),
587 Div(Attr, Vec<Block>),
588 LineBlock(Vec<Vec<Inline>>),
589 DefinitionList(Vec<(Vec<Inline>, Vec<Vec<Block>>)>),
590 Figure(Attr, Vec<Block>, Vec<Block>),
595 Unsupported(String),
596}
597
598#[derive(Debug)]
599struct TableData {
600 attr: Attr,
604 caption: Vec<Inline>,
605 aligns: Vec<&'static str>,
606 widths: Vec<Option<f64>>,
608 head_rows: Vec<Vec<GridCell>>,
609 body_rows: Vec<Vec<GridCell>>,
610 foot_rows: Vec<Vec<GridCell>>,
613}
614
615#[derive(Debug)]
619struct GridCell {
620 row_span: u32,
621 col_span: u32,
622 blocks: Vec<Block>,
623}
624
625impl GridCell {
626 fn no_span(blocks: Vec<Block>) -> Self {
627 Self {
628 row_span: 1,
629 col_span: 1,
630 blocks,
631 }
632 }
633}
634
635#[derive(Debug)]
636#[allow(clippy::enum_variant_names)]
637enum Inline {
638 Str(String),
639 Space,
640 SoftBreak,
641 LineBreak,
642 Emph(Vec<Inline>),
643 Strong(Vec<Inline>),
644 Strikeout(Vec<Inline>),
645 Superscript(Vec<Inline>),
646 Subscript(Vec<Inline>),
647 Code(Attr, String),
648 Link(Attr, Vec<Inline>, String, String),
649 Image(Attr, Vec<Inline>, String, String),
650 Math(&'static str, String),
651 Span(Attr, Vec<Inline>),
652 RawInline(String, String),
653 Quoted(&'static str, Vec<Inline>),
654 Note(Vec<Block>),
655 Cite(Vec<Citation>, Vec<Inline>),
656 Unsupported(String),
657}
658
659#[derive(Debug)]
660struct Citation {
661 id: String,
662 prefix: Vec<Inline>,
663 suffix: Vec<Inline>,
664 mode: CitationMode,
665 note_num: i64,
666 hash: i64,
667}
668
669#[derive(Debug, Clone, Copy)]
670enum CitationMode {
671 AuthorInText,
672 NormalCitation,
673 SuppressAuthor,
674}
675
676#[derive(Debug, Default, Clone)]
677struct Attr {
678 id: String,
679 classes: Vec<String>,
680 kvs: Vec<(String, String)>,
681}
682
683fn blocks_from_doc(doc: &SyntaxNode) -> Vec<Block> {
686 let mut out = Vec::new();
687 for child in doc.children() {
688 collect_block(&child, &mut out);
689 }
690 out
691}
692
693fn block_from(node: &SyntaxNode) -> Option<Block> {
694 match node.kind() {
695 SyntaxKind::PARAGRAPH => Some(Block::Para(coalesce_inlines(inlines_from(node)))),
696 SyntaxKind::PLAIN => Some(Block::Plain(coalesce_inlines(inlines_from(node)))),
697 SyntaxKind::HEADING => Some(heading_block(node)),
698 SyntaxKind::BLOCK_QUOTE => Some(Block::BlockQuote(blockquote_blocks(node))),
699 SyntaxKind::CODE_BLOCK => Some(code_block(node)),
700 SyntaxKind::HORIZONTAL_RULE => Some(Block::HorizontalRule),
701 SyntaxKind::LIST => Some(list_block(node)),
702 SyntaxKind::BLANK_LINE => None,
703 SyntaxKind::REFERENCE_DEFINITION => None,
706 SyntaxKind::FOOTNOTE_DEFINITION => None,
709 SyntaxKind::YAML_METADATA => None,
712 SyntaxKind::PANDOC_TITLE_BLOCK => None,
715 SyntaxKind::HTML_BLOCK => Some(html_block(node)),
716 SyntaxKind::PIPE_TABLE => pipe_table(node).map(Block::Table),
717 SyntaxKind::SIMPLE_TABLE => simple_table(node).map(Block::Table),
718 SyntaxKind::GRID_TABLE => grid_table(node).map(Block::Table),
719 SyntaxKind::MULTILINE_TABLE => multiline_table(node).map(Block::Table),
720 SyntaxKind::TEX_BLOCK => Some(tex_block(node)),
721 SyntaxKind::FENCED_DIV => Some(fenced_div(node)),
722 SyntaxKind::LINE_BLOCK => Some(line_block(node)),
723 SyntaxKind::DEFINITION_LIST => Some(definition_list(node)),
724 SyntaxKind::FIGURE => Some(figure_block(node)),
725 other => Some(Block::Unsupported(format!("{other:?}"))),
726 }
727}
728
729fn figure_block(node: &SyntaxNode) -> Block {
736 let mut alt: Vec<Inline> = Vec::new();
737 let mut image_inline: Option<Inline> = None;
738 if let Some(image) = node.children().find(|c| c.kind() == SyntaxKind::IMAGE_LINK) {
739 let alt_node = image.children().find(|c| c.kind() == SyntaxKind::IMAGE_ALT);
740 if let Some(an) = alt_node {
741 alt = coalesce_inlines(inlines_from(&an));
742 }
743 let mut tmp = Vec::new();
744 render_image_inline(&image, &mut tmp);
745 if let Some(first) = tmp.into_iter().next() {
746 image_inline = Some(first);
747 }
748 }
749 let (figure_attr, image_inline) = match image_inline {
752 Some(Inline::Image(mut attr, alt_inlines, url, title)) if !attr.id.is_empty() => {
753 let fig_attr = Attr::with_id(std::mem::take(&mut attr.id));
754 (fig_attr, Some(Inline::Image(attr, alt_inlines, url, title)))
755 }
756 other => (Attr::default(), other),
757 };
758 let caption = if alt.is_empty() {
759 Vec::new()
760 } else {
761 vec![Block::Plain(alt)]
762 };
763 let body = match image_inline {
764 Some(img) => vec![Block::Plain(vec![img])],
765 None => Vec::new(),
766 };
767 Block::Figure(figure_attr, caption, body)
768}
769
770fn heading_block(node: &SyntaxNode) -> Block {
771 let level = heading_level(node);
772 let inlines = node
773 .children()
774 .find(|c| c.kind() == SyntaxKind::HEADING_CONTENT)
775 .map(|c| coalesce_inlines(inlines_from(&c)))
776 .unwrap_or_default();
777 let offset: u32 = node.text_range().start().into();
781 let final_id = REFS_CTX
782 .with(|c| c.borrow().heading_id_by_offset.get(&offset).cloned())
783 .unwrap_or_default();
784 let attr = node
785 .children_with_tokens()
786 .find_map(|el| match el {
787 NodeOrToken::Node(n) if n.kind() == SyntaxKind::ATTRIBUTE => Some(n.text().to_string()),
788 NodeOrToken::Token(t) if t.kind() == SyntaxKind::ATTRIBUTE => {
789 Some(t.text().to_string())
790 }
791 _ => None,
792 })
793 .map(|raw| {
794 let trimmed = raw.trim();
795 if let Some(inner) = trimmed.strip_prefix('{').and_then(|s| s.strip_suffix('}')) {
796 let mut attr = parse_attr_block(inner);
797 if attr.id.is_empty() {
798 attr.id = final_id.clone();
799 }
800 attr
801 } else {
802 Attr::with_id(final_id.clone())
803 }
804 })
805 .unwrap_or_else(|| Attr::with_id(final_id));
806 Block::Header(level, attr, inlines)
807}
808
809fn heading_level(node: &SyntaxNode) -> usize {
810 for child in node.children() {
811 if child.kind() == SyntaxKind::ATX_HEADING_MARKER {
812 for tok in child.children_with_tokens() {
813 if let Some(t) = tok.as_token()
814 && t.kind() == SyntaxKind::ATX_HEADING_MARKER
815 {
816 return t.text().chars().filter(|&c| c == '#').count();
817 }
818 }
819 }
820 }
821 for el in node.descendants_with_tokens() {
822 if let NodeOrToken::Token(t) = el
823 && t.kind() == SyntaxKind::SETEXT_HEADING_UNDERLINE
824 {
825 return if t.text().trim_start().starts_with('=') {
826 1
827 } else {
828 2
829 };
830 }
831 }
832 1
833}
834
835fn blockquote_blocks(node: &SyntaxNode) -> Vec<Block> {
836 let mut out = Vec::new();
837 for child in node.children() {
838 collect_block(&child, &mut out);
839 }
840 out
841}
842
843fn code_block(node: &SyntaxNode) -> Block {
844 let raw_format = code_block_raw_format(node);
845 let attr = code_block_attr(node);
846 let is_fenced = node
847 .children()
848 .any(|c| c.kind() == SyntaxKind::CODE_FENCE_OPEN);
849 let mut content = String::new();
850 for child in node.children() {
851 if child.kind() == SyntaxKind::CODE_CONTENT {
852 content.push_str(&child.text().to_string());
853 }
854 }
855 while content.ends_with('\n') {
857 content.pop();
858 }
859 if is_fenced {
860 content = content
865 .split('\n')
866 .map(expand_tabs_to_4)
867 .collect::<Vec<_>>()
868 .join("\n");
869 } else {
870 content = strip_indented_code_indent(&content);
871 }
872 if let Some(fmt) = raw_format {
873 return Block::RawBlock(fmt, content);
874 }
875 Block::CodeBlock(attr, content)
876}
877
878fn code_block_raw_format(node: &SyntaxNode) -> Option<String> {
883 let open = node
884 .children()
885 .find(|c| c.kind() == SyntaxKind::CODE_FENCE_OPEN)?;
886 let info = open
887 .children()
888 .find(|c| c.kind() == SyntaxKind::CODE_INFO)?;
889 let raw = info.text().to_string();
890 let trimmed = raw.trim();
891 let inner = trimmed
892 .strip_prefix('{')
893 .and_then(|s| s.strip_suffix('}'))?;
894 let inner = inner.trim();
895 let format = inner.strip_prefix('=')?.trim();
896 if format.is_empty() || format.contains(char::is_whitespace) {
897 return None;
898 }
899 Some(format.to_string())
900}
901
902fn code_block_attr(node: &SyntaxNode) -> Attr {
903 let Some(open) = node
904 .children()
905 .find(|c| c.kind() == SyntaxKind::CODE_FENCE_OPEN)
906 else {
907 return Attr::default();
908 };
909 let Some(info) = open.children().find(|c| c.kind() == SyntaxKind::CODE_INFO) else {
910 return Attr::default();
911 };
912 let raw = info.text().to_string();
913 let trimmed = raw.trim();
914 if let Some(inner) = trimmed.strip_prefix('{').and_then(|s| s.strip_suffix('}')) {
915 return parse_attr_block(inner);
916 }
917 if let Some(brace) = trimmed.find('{')
920 && trimmed.ends_with('}')
921 {
922 let lang = trimmed[..brace].trim();
923 let attr_inner = &trimmed[brace + 1..trimmed.len() - 1];
924 let mut attr = parse_attr_block(attr_inner);
925 if !lang.is_empty() {
926 attr.classes.insert(0, normalize_lang_id(lang));
927 }
928 return attr;
929 }
930 if !trimmed.is_empty() {
931 return Attr {
932 id: String::new(),
933 classes: vec![normalize_lang_id(trimmed)],
934 kvs: Vec::new(),
935 };
936 }
937 Attr::default()
938}
939
940fn normalize_lang_id(lang: &str) -> String {
944 let lower = lang.to_ascii_lowercase();
945 match lower.as_str() {
946 "c++" => "cpp".to_string(),
947 "objective-c" => "objectivec".to_string(),
948 _ => lower,
949 }
950}
951
952fn strip_indented_code_indent(s: &str) -> String {
956 let mut out = String::with_capacity(s.len());
957 for (i, line) in s.split('\n').enumerate() {
958 if i > 0 {
959 out.push('\n');
960 }
961 let expanded = expand_tabs_to_4(line);
965 let stripped = if let Some(rest) = expanded.strip_prefix(" ") {
966 rest.to_string()
967 } else if let Some(rest) = expanded.strip_prefix('\t') {
968 rest.to_string()
969 } else {
970 expanded
974 };
975 out.push_str(&stripped);
976 }
977 out
978}
979
980fn expand_tabs_to_4(line: &str) -> String {
984 let mut out = String::with_capacity(line.len());
985 let mut col = 0usize;
986 for c in line.chars() {
987 if c == '\t' {
988 let next = (col / 4 + 1) * 4;
989 for _ in col..next {
990 out.push(' ');
991 }
992 col = next;
993 } else {
994 out.push(c);
995 col += 1;
996 }
997 }
998 out
999}
1000
1001fn html_block(node: &SyntaxNode) -> Block {
1002 let mut content = node.text().to_string();
1003 while content.ends_with('\n') {
1004 content.pop();
1005 }
1006 if let Some(div) = try_div_html_block(&content) {
1007 return div;
1008 }
1009 Block::RawBlock("html".to_string(), content)
1010}
1011
1012fn emit_html_block(node: &SyntaxNode, out: &mut Vec<Block>) {
1020 let mut content = node.text().to_string();
1021 while content.ends_with('\n') {
1022 content.pop();
1023 }
1024 if let Some(div) = try_div_html_block(&content) {
1025 out.push(div);
1026 return;
1027 }
1028 let leading_ws = content
1029 .as_bytes()
1030 .iter()
1031 .position(|&b| b != b' ' && b != b'\t')
1032 .unwrap_or(content.len());
1033 let trimmed = &content[leading_ws..];
1034 if trimmed.starts_with("<!--")
1035 || trimmed.starts_with("<?")
1036 || trimmed.starts_with("<![CDATA[")
1037 || trimmed.starts_with("<!")
1038 || is_raw_text_element_open(trimmed)
1039 {
1040 out.push(Block::RawBlock("html".to_string(), content));
1041 return;
1042 }
1043 if !content.contains('\n') {
1044 out.push(Block::RawBlock("html".to_string(), content));
1045 return;
1046 }
1047 for line in content.split('\n') {
1048 let line_trimmed = line.trim();
1049 if line_trimmed.is_empty() {
1050 continue;
1051 }
1052 if is_complete_html_tag_line(line_trimmed) {
1053 out.push(Block::RawBlock(
1054 "html".to_string(),
1055 line_trimmed.to_string(),
1056 ));
1057 } else {
1058 let inlines = coalesce_inlines(parse_cell_text_inlines(line_trimmed));
1059 if !inlines.is_empty() {
1060 out.push(Block::Plain(inlines));
1061 }
1062 }
1063 }
1064}
1065
1066fn is_raw_text_element_open(s: &str) -> bool {
1071 let bytes = s.as_bytes();
1072 if bytes.is_empty() || bytes[0] != b'<' {
1073 return false;
1074 }
1075 let rest = &s[1..];
1076 for tag in ["script", "style", "pre", "textarea"] {
1077 if rest.len() < tag.len() {
1078 continue;
1079 }
1080 if rest[..tag.len()].eq_ignore_ascii_case(tag) {
1081 let after = rest.as_bytes().get(tag.len()).copied();
1082 match after {
1083 None => return true,
1084 Some(b' ') | Some(b'\t') | Some(b'\n') | Some(b'>') | Some(b'/') => {
1085 return true;
1086 }
1087 _ => {}
1088 }
1089 }
1090 }
1091 false
1092}
1093
1094fn is_complete_html_tag_line(s: &str) -> bool {
1098 let bytes = s.as_bytes();
1099 if bytes.is_empty() || bytes[0] != b'<' {
1100 return false;
1101 }
1102 let mut i = 1;
1103 while i < bytes.len() {
1104 match bytes[i] {
1105 b'>' => return i == bytes.len() - 1,
1106 b'"' => {
1107 i += 1;
1108 while i < bytes.len() && bytes[i] != b'"' {
1109 i += 1;
1110 }
1111 if i >= bytes.len() {
1112 return false;
1113 }
1114 i += 1;
1115 }
1116 b'\'' => {
1117 i += 1;
1118 while i < bytes.len() && bytes[i] != b'\'' {
1119 i += 1;
1120 }
1121 if i >= bytes.len() {
1122 return false;
1123 }
1124 i += 1;
1125 }
1126 _ => i += 1,
1127 }
1128 }
1129 false
1130}
1131
1132fn collect_block(node: &SyntaxNode, out: &mut Vec<Block>) {
1136 if node.kind() == SyntaxKind::HTML_BLOCK {
1137 emit_html_block(node, out);
1138 return;
1139 }
1140 if let Some(b) = block_from(node) {
1141 out.push(b);
1142 }
1143}
1144
1145fn try_div_html_block(content: &str) -> Option<Block> {
1152 let bytes = content.as_bytes();
1153 let leading_ws = bytes
1154 .iter()
1155 .position(|&b| b != b' ' && b != b'\t')
1156 .unwrap_or(bytes.len());
1157 let head = &content[leading_ws..];
1158 let head_bytes = head.as_bytes();
1159 if head_bytes.len() < 4 || !head_bytes[..4].eq_ignore_ascii_case(b"<div") {
1160 return None;
1161 }
1162 let after_div = head_bytes.get(4).copied();
1163 match after_div {
1164 Some(b' ') | Some(b'\t') | Some(b'\n') | Some(b'>') | Some(b'/') => {}
1165 _ => return None,
1166 }
1167 let close_gt_rel = head[4..].find('>')?;
1168 let open_attrs_raw = &head[4..4 + close_gt_rel];
1169 let open_attrs = open_attrs_raw.trim_matches(|c: char| c.is_whitespace() || c == '/');
1170 let attr = parse_html_attrs(open_attrs);
1171 let after_open_tag = leading_ws + 4 + close_gt_rel + 1;
1172 let multiline = content.as_bytes().get(after_open_tag).copied() == Some(b'\n');
1173 let trailing_ws = content.as_bytes()[after_open_tag..]
1174 .iter()
1175 .rev()
1176 .position(|&b| b != b' ' && b != b'\t' && b != b'\n')
1177 .unwrap_or(0);
1178 let close_end = content.len() - trailing_ws;
1179 let close_search = &content[after_open_tag..close_end];
1180 if !close_search.to_ascii_lowercase().ends_with("</div>") {
1181 return None;
1182 }
1183 let close_start = after_open_tag + close_search.len() - "</div>".len();
1184 let inner = content[after_open_tag..close_start].trim_matches('\n');
1185 let mut blocks = parse_pandoc_blocks(inner);
1186 if !multiline
1187 && blocks.len() == 1
1188 && let Block::Para(inlines) = blocks.remove(0)
1189 {
1190 blocks.push(Block::Plain(inlines));
1191 }
1192 Some(Block::Div(attr, blocks))
1193}
1194
1195fn parse_pandoc_blocks(text: &str) -> Vec<Block> {
1199 if text.trim().is_empty() {
1200 return Vec::new();
1201 }
1202 let opts = crate::ParserOptions {
1203 flavor: crate::Flavor::Pandoc,
1204 dialect: crate::Dialect::for_flavor(crate::Flavor::Pandoc),
1205 extensions: crate::Extensions::for_flavor(crate::Flavor::Pandoc),
1206 ..crate::ParserOptions::default()
1207 };
1208 let doc = crate::parse(text, Some(opts));
1209 let mut out = Vec::new();
1210 for child in doc.children() {
1211 collect_block(&child, &mut out);
1212 }
1213 out
1214}
1215
1216fn tex_block(node: &SyntaxNode) -> Block {
1217 let mut content = node.text().to_string();
1218 while content.ends_with('\n') {
1219 content.pop();
1220 }
1221 Block::RawBlock("tex".to_string(), content)
1222}
1223
1224fn fenced_div(node: &SyntaxNode) -> Block {
1225 let attr = node
1226 .children()
1227 .find(|c| c.kind() == SyntaxKind::DIV_FENCE_OPEN)
1228 .map(|open| {
1229 let info = open
1230 .children()
1231 .find(|c| c.kind() == SyntaxKind::DIV_INFO)
1232 .map(|n| n.text().to_string())
1233 .unwrap_or_default();
1234 parse_div_info(info.trim())
1235 })
1236 .unwrap_or_default();
1237 let mut blocks = Vec::new();
1238 for child in node.children() {
1239 match child.kind() {
1240 SyntaxKind::DIV_FENCE_OPEN | SyntaxKind::DIV_FENCE_CLOSE => {}
1241 _ => collect_block(&child, &mut blocks),
1242 }
1243 }
1244 Block::Div(attr, blocks)
1245}
1246
1247fn parse_div_info(info: &str) -> Attr {
1250 if info.starts_with('{') && info.ends_with('}') {
1251 return parse_attr_block(&info[1..info.len() - 1]);
1252 }
1253 if !info.is_empty() {
1254 return Attr {
1255 id: String::new(),
1256 classes: vec![info.to_string()],
1257 kvs: Vec::new(),
1258 };
1259 }
1260 Attr::default()
1261}
1262
1263fn extract_attr_from_node(parent: &SyntaxNode) -> Attr {
1267 let raw = parent.children_with_tokens().find_map(|el| match el {
1268 NodeOrToken::Node(n) if n.kind() == SyntaxKind::ATTRIBUTE => Some(n.text().to_string()),
1269 NodeOrToken::Token(t) if t.kind() == SyntaxKind::ATTRIBUTE => Some(t.text().to_string()),
1270 _ => None,
1271 });
1272 let Some(raw) = raw else {
1273 return Attr::default();
1274 };
1275 let trimmed = raw.trim();
1276 if let Some(inner) = trimmed.strip_prefix('{').and_then(|s| s.strip_suffix('}')) {
1277 parse_attr_block(inner)
1278 } else {
1279 Attr::default()
1280 }
1281}
1282
1283fn parse_attr_block(s: &str) -> Attr {
1287 let mut id = String::new();
1288 let mut classes: Vec<String> = Vec::new();
1289 let mut kvs: Vec<(String, String)> = Vec::new();
1290 let bytes = s.as_bytes();
1291 let mut i = 0usize;
1292 while i < bytes.len() {
1293 match bytes[i] {
1294 b' ' | b'\t' | b'\n' | b'\r' => {
1295 i += 1;
1296 }
1297 b'#' => {
1298 let start = i + 1;
1299 let mut j = start;
1300 while j < bytes.len() && !matches!(bytes[j], b' ' | b'\t' | b'\n' | b'\r') {
1301 j += 1;
1302 }
1303 id = s[start..j].to_string();
1304 i = j;
1305 }
1306 b'.' => {
1307 let start = i + 1;
1308 let mut j = start;
1309 while j < bytes.len() && !matches!(bytes[j], b' ' | b'\t' | b'\n' | b'\r') {
1310 j += 1;
1311 }
1312 classes.push(s[start..j].to_string());
1313 i = j;
1314 }
1315 _ => {
1316 let key_start = i;
1318 while i < bytes.len() && !matches!(bytes[i], b' ' | b'\t' | b'\n' | b'\r' | b'=') {
1319 i += 1;
1320 }
1321 let key = s[key_start..i].to_string();
1322 if i < bytes.len() && bytes[i] == b'=' {
1323 i += 1;
1324 let value = if i < bytes.len() && bytes[i] == b'"' {
1325 i += 1;
1326 let v_start = i;
1327 while i < bytes.len() && bytes[i] != b'"' {
1328 i += 1;
1329 }
1330 let v = s[v_start..i].to_string();
1331 if i < bytes.len() {
1332 i += 1;
1333 }
1334 v
1335 } else {
1336 let v_start = i;
1337 while i < bytes.len() && !matches!(bytes[i], b' ' | b'\t' | b'\n' | b'\r') {
1338 i += 1;
1339 }
1340 s[v_start..i].to_string()
1341 };
1342 kvs.push((key, value));
1343 } else if !key.is_empty() {
1344 classes.push(key);
1346 }
1347 }
1348 }
1349 }
1350 Attr { id, classes, kvs }
1351}
1352
1353fn parse_html_attrs(s: &str) -> Attr {
1356 let mut id = String::new();
1357 let mut classes: Vec<String> = Vec::new();
1358 let mut kvs: Vec<(String, String)> = Vec::new();
1359 let bytes = s.as_bytes();
1360 let mut i = 0usize;
1361 while i < bytes.len() {
1362 match bytes[i] {
1363 b' ' | b'\t' | b'\n' | b'\r' => {
1364 i += 1;
1365 }
1366 _ => {
1367 let key_start = i;
1368 while i < bytes.len() && !matches!(bytes[i], b' ' | b'\t' | b'\n' | b'\r' | b'=') {
1369 i += 1;
1370 }
1371 let key = s[key_start..i].to_string();
1372 let value = if i < bytes.len() && bytes[i] == b'=' {
1373 i += 1;
1374 if i < bytes.len() && (bytes[i] == b'"' || bytes[i] == b'\'') {
1375 let quote = bytes[i];
1376 i += 1;
1377 let v_start = i;
1378 while i < bytes.len() && bytes[i] != quote {
1379 i += 1;
1380 }
1381 let v = s[v_start..i].to_string();
1382 if i < bytes.len() {
1383 i += 1;
1384 }
1385 v
1386 } else {
1387 let v_start = i;
1388 while i < bytes.len() && !matches!(bytes[i], b' ' | b'\t' | b'\n' | b'\r') {
1389 i += 1;
1390 }
1391 s[v_start..i].to_string()
1392 }
1393 } else {
1394 String::new()
1395 };
1396 if key.is_empty() {
1397 continue;
1398 }
1399 match key.as_str() {
1400 "class" => {
1401 for c in value.split_ascii_whitespace() {
1402 classes.push(c.to_string());
1403 }
1404 }
1405 "id" => id = value,
1406 _ => kvs.push((key, value)),
1407 }
1408 }
1409 }
1410 }
1411 Attr { id, classes, kvs }
1412}
1413
1414fn definition_list(node: &SyntaxNode) -> Block {
1415 let items: Vec<(Vec<Inline>, Vec<Vec<Block>>)> = node
1416 .children()
1417 .filter(|c| c.kind() == SyntaxKind::DEFINITION_ITEM)
1418 .map(|item| {
1419 let term = item
1420 .children()
1421 .find(|c| c.kind() == SyntaxKind::TERM)
1422 .map(|t| coalesce_inlines(inlines_from(&t)))
1423 .unwrap_or_default();
1424 let loose = is_loose_definition_item(&item);
1425 let defs: Vec<Vec<Block>> = item
1426 .children()
1427 .filter(|c| c.kind() == SyntaxKind::DEFINITION)
1428 .map(|d| definition_blocks(&d, loose))
1429 .collect();
1430 (term, defs)
1431 })
1432 .collect();
1433 Block::DefinitionList(items)
1434}
1435
1436fn is_loose_definition_item(item: &SyntaxNode) -> bool {
1442 let mut saw_term = false;
1443 for child in item.children_with_tokens() {
1444 if let NodeOrToken::Node(n) = child {
1445 match n.kind() {
1446 SyntaxKind::TERM => {
1447 saw_term = true;
1448 }
1449 SyntaxKind::BLANK_LINE if saw_term => {
1450 return true;
1451 }
1452 SyntaxKind::DEFINITION => {
1453 return false;
1454 }
1455 _ => {}
1456 }
1457 }
1458 }
1459 false
1460}
1461
1462fn definition_blocks(def_node: &SyntaxNode, loose: bool) -> Vec<Block> {
1463 let extra = definition_content_offset(def_node);
1468 let mut out = Vec::new();
1469 for child in def_node.children() {
1470 match child.kind() {
1471 SyntaxKind::PLAIN => {
1472 let inlines = coalesce_inlines(inlines_from(&child));
1473 if loose {
1474 out.push(Block::Para(inlines));
1475 } else {
1476 out.push(Block::Plain(inlines));
1477 }
1478 }
1479 SyntaxKind::PARAGRAPH => {
1480 out.push(Block::Para(coalesce_inlines(inlines_from(&child))));
1481 }
1482 SyntaxKind::CODE_BLOCK if extra > 0 => {
1483 out.push(indented_code_block_with_extra_strip(&child, extra));
1484 }
1485 _ => collect_block(&child, &mut out),
1486 }
1487 }
1488 out
1489}
1490
1491fn definition_content_offset(def_node: &SyntaxNode) -> usize {
1496 let mut col = 0usize;
1497 let mut saw_marker = false;
1498 for el in def_node.children_with_tokens() {
1499 if let NodeOrToken::Token(t) = el {
1500 match t.kind() {
1501 SyntaxKind::DEFINITION_MARKER => {
1502 col = advance_col(col, t.text());
1503 saw_marker = true;
1504 }
1505 SyntaxKind::WHITESPACE if saw_marker => {
1506 return advance_col(col, t.text());
1507 }
1508 _ if saw_marker => return col,
1509 _ => {}
1510 }
1511 } else if saw_marker {
1512 return col;
1513 }
1514 }
1515 col
1516}
1517
1518fn advance_col(start: usize, s: &str) -> usize {
1521 let mut col = start;
1522 for c in s.chars() {
1523 if c == '\t' {
1524 col = (col / 4 + 1) * 4;
1525 } else {
1526 col += 1;
1527 }
1528 }
1529 col
1530}
1531
1532fn line_block(node: &SyntaxNode) -> Block {
1533 let lines: Vec<Vec<Inline>> = node
1534 .children()
1535 .filter(|c| c.kind() == SyntaxKind::LINE_BLOCK_LINE)
1536 .map(|line| {
1537 let mut out = Vec::new();
1538 for el in line.children_with_tokens() {
1539 match el {
1540 NodeOrToken::Token(t) => match t.kind() {
1541 SyntaxKind::LINE_BLOCK_MARKER | SyntaxKind::NEWLINE => {}
1542 _ => push_token_inline(&t, &mut out),
1543 },
1544 NodeOrToken::Node(n) => out.push(inline_from_node(&n)),
1545 }
1546 }
1547 coalesce_inlines(out)
1548 })
1549 .collect();
1550 Block::LineBlock(lines)
1551}
1552
1553fn latex_command_inline(node: &SyntaxNode) -> Inline {
1554 let content = node.text().to_string();
1555 Inline::RawInline("tex".to_string(), content)
1556}
1557
1558fn bracketed_span_inline(node: &SyntaxNode) -> Inline {
1559 let is_html = node
1560 .children_with_tokens()
1561 .any(|el| matches!(&el, NodeOrToken::Token(t) if t.kind() == SyntaxKind::SPAN_BRACKET_OPEN && t.text().starts_with('<')));
1562 let attr_text = node.children_with_tokens().find_map(|el| match el {
1563 NodeOrToken::Token(t) if t.kind() == SyntaxKind::SPAN_ATTRIBUTES => {
1564 Some(t.text().to_string())
1565 }
1566 NodeOrToken::Node(n) if n.kind() == SyntaxKind::SPAN_ATTRIBUTES => {
1567 Some(n.text().to_string())
1568 }
1569 _ => None,
1570 });
1571 let attr = attr_text
1572 .map(|raw| {
1573 let trimmed = raw.trim();
1574 if is_html {
1575 parse_html_attrs(trimmed)
1576 } else if let Some(inner) = trimmed.strip_prefix('{').and_then(|s| s.strip_suffix('}'))
1577 {
1578 parse_attr_block(inner)
1579 } else {
1580 Attr::default()
1581 }
1582 })
1583 .unwrap_or_default();
1584 let content = node
1585 .children()
1586 .find(|c| c.kind() == SyntaxKind::SPAN_CONTENT)
1587 .map(|n| coalesce_inlines(inlines_from(&n)))
1588 .unwrap_or_default();
1589 Inline::Span(attr, content)
1590}
1591
1592fn pipe_table(node: &SyntaxNode) -> Option<TableData> {
1593 let mut header_cells: Vec<Vec<Inline>> = Vec::new();
1594 let mut body_rows: Vec<Vec<Vec<Inline>>> = Vec::new();
1595 let mut aligns: Vec<&'static str> = Vec::new();
1596 let mut caption_inlines: Vec<Inline> = Vec::new();
1597 for child in node.children() {
1598 match child.kind() {
1599 SyntaxKind::TABLE_HEADER => {
1600 header_cells = pipe_table_cells(&child);
1601 }
1602 SyntaxKind::TABLE_SEPARATOR => {
1603 let raw = child.text().to_string();
1604 aligns = pipe_separator_aligns(&raw);
1605 }
1606 SyntaxKind::TABLE_ROW => {
1607 body_rows.push(pipe_table_cells(&child));
1608 }
1609 SyntaxKind::TABLE_CAPTION => {
1610 caption_inlines = pipe_table_caption(&child);
1611 }
1612 _ => {}
1613 }
1614 }
1615 let cols = header_cells
1616 .len()
1617 .max(body_rows.iter().map(Vec::len).max().unwrap_or(0))
1618 .max(aligns.len());
1619 if cols == 0 {
1620 return None;
1621 }
1622 while aligns.len() < cols {
1623 aligns.push("AlignDefault");
1624 }
1625 let head_rows = if header_cells.is_empty() {
1626 Vec::new()
1627 } else {
1628 vec![cells_to_plain_blocks(header_cells, cols)]
1629 };
1630 let body_rows: Vec<Vec<GridCell>> = body_rows
1631 .into_iter()
1632 .map(|cells| cells_to_plain_blocks(cells, cols))
1633 .collect();
1634 let (attr, caption_inlines) = extract_caption_attrs(caption_inlines);
1635 Some(TableData {
1636 attr,
1637 caption: caption_inlines,
1638 aligns,
1639 widths: vec![None; cols],
1640 head_rows,
1641 body_rows,
1642 foot_rows: Vec::new(),
1643 })
1644}
1645
1646fn pipe_table_cells(row: &SyntaxNode) -> Vec<Vec<Inline>> {
1647 row.children()
1648 .filter(|c| c.kind() == SyntaxKind::TABLE_CELL)
1649 .map(|cell| coalesce_inlines(inlines_from(&cell)))
1650 .collect()
1651}
1652
1653fn extract_caption_attrs(mut inlines: Vec<Inline>) -> (Attr, Vec<Inline>) {
1661 let last_str_end = inlines
1662 .iter()
1663 .rposition(|i| matches!(i, Inline::Str(s) if s.ends_with('}')));
1664 let Some(end_idx) = last_str_end else {
1665 return (Attr::default(), inlines);
1666 };
1667 let mut start_idx = end_idx;
1671 let mut found_open = false;
1672 loop {
1673 match &inlines[start_idx] {
1674 Inline::Str(s) => {
1675 if s.starts_with('{') {
1676 found_open = true;
1677 break;
1678 }
1679 }
1680 Inline::Space => {}
1681 _ => return (Attr::default(), inlines),
1682 }
1683 if start_idx == 0 {
1684 break;
1685 }
1686 start_idx -= 1;
1687 }
1688 if !found_open {
1689 return (Attr::default(), inlines);
1690 }
1691 let mut raw = String::new();
1694 for el in &inlines[start_idx..=end_idx] {
1695 match el {
1696 Inline::Str(s) => raw.push_str(s),
1697 Inline::Space => raw.push(' '),
1698 _ => return (Attr::default(), inlines),
1699 }
1700 }
1701 if !(raw.starts_with('{') && raw.ends_with('}')) {
1702 return (Attr::default(), inlines);
1703 }
1704 let inner = &raw[1..raw.len() - 1];
1705 let attr = parse_attr_block(inner);
1706 inlines.truncate(start_idx);
1707 if matches!(inlines.last(), Some(Inline::Space)) {
1708 inlines.pop();
1709 }
1710 (attr, inlines)
1711}
1712
1713fn pipe_table_caption(node: &SyntaxNode) -> Vec<Inline> {
1714 let mut out = Vec::new();
1716 let mut after_prefix = false;
1717 for el in node.children_with_tokens() {
1718 match el {
1719 NodeOrToken::Node(n) => {
1720 if n.kind() == SyntaxKind::TABLE_CAPTION_PREFIX {
1721 after_prefix = true;
1722 continue;
1723 }
1724 if after_prefix {
1725 out.push(inline_from_node(&n));
1726 }
1727 }
1728 NodeOrToken::Token(t) => {
1729 if t.kind() == SyntaxKind::TABLE_CAPTION_PREFIX {
1730 after_prefix = true;
1731 continue;
1732 }
1733 if after_prefix {
1734 push_token_inline(&t, &mut out);
1735 }
1736 }
1737 }
1738 }
1739 coalesce_inlines(out)
1740}
1741
1742fn pipe_separator_aligns(raw: &str) -> Vec<&'static str> {
1743 let trimmed = raw.trim();
1748 let inner = trimmed.trim_start_matches('|').trim_end_matches('|');
1749 inner
1750 .split('|')
1751 .map(|seg| {
1752 let s = seg.trim();
1753 let left = s.starts_with(':');
1754 let right = s.ends_with(':');
1755 match (left, right) {
1756 (true, true) => "AlignCenter",
1757 (true, false) => "AlignLeft",
1758 (false, true) => "AlignRight",
1759 _ => "AlignDefault",
1760 }
1761 })
1762 .collect()
1763}
1764
1765fn cells_to_plain_blocks(cells: Vec<Vec<Inline>>, cols: usize) -> Vec<GridCell> {
1766 let mut out: Vec<GridCell> = cells
1767 .into_iter()
1768 .map(|inlines| {
1769 let blocks = if inlines.is_empty() {
1770 Vec::new()
1771 } else {
1772 vec![Block::Plain(inlines)]
1773 };
1774 GridCell::no_span(blocks)
1775 })
1776 .collect();
1777 while out.len() < cols {
1778 out.push(GridCell::no_span(Vec::new()));
1779 }
1780 out
1781}
1782
1783fn show_double(x: f64) -> String {
1787 if x == 0.0 {
1788 return "0.0".to_string();
1789 }
1790 let abs = x.abs();
1791 if (0.1..1e7).contains(&abs) {
1792 let s = format!("{x}");
1793 if s.contains('.') || s.contains('e') {
1794 s
1795 } else {
1796 format!("{s}.0")
1797 }
1798 } else {
1799 let s = format!("{x:e}");
1802 if let Some((m, e)) = s.split_once('e') {
1803 if m.contains('.') {
1804 s
1805 } else {
1806 format!("{m}.0e{e}")
1807 }
1808 } else {
1809 s
1810 }
1811 }
1812}
1813
1814fn simple_table(node: &SyntaxNode) -> Option<TableData> {
1831 let separator = node
1832 .children()
1833 .find(|c| c.kind() == SyntaxKind::TABLE_SEPARATOR)?;
1834 let cols = simple_table_dash_runs(&separator);
1835 if cols.is_empty() {
1836 return None;
1837 }
1838 let header = node
1839 .children()
1840 .find(|c| c.kind() == SyntaxKind::TABLE_HEADER);
1841 let mut body_rows_nodes: Vec<SyntaxNode> = node
1845 .children()
1846 .filter(|c| c.kind() == SyntaxKind::TABLE_ROW)
1847 .collect();
1848 if header.is_none()
1849 && body_rows_nodes
1850 .last()
1851 .map(simple_table_row_is_all_dashes)
1852 .unwrap_or(false)
1853 {
1854 body_rows_nodes.pop();
1855 }
1856 let aligns = if let Some(h) = &header {
1858 simple_table_aligns(h, &cols)
1859 } else if let Some(r0) = body_rows_nodes.first() {
1860 simple_table_aligns(r0, &cols)
1861 } else {
1862 vec!["AlignDefault"; cols.len()]
1863 };
1864 let head_rows = match &header {
1865 Some(h) => {
1866 let cells: Vec<Vec<Inline>> = simple_table_row_cells(h);
1867 vec![cells_to_plain_blocks(cells, cols.len())]
1868 }
1869 None => Vec::new(),
1870 };
1871 let body_rows: Vec<Vec<GridCell>> = body_rows_nodes
1872 .iter()
1873 .map(|r| cells_to_plain_blocks(simple_table_row_cells(r), cols.len()))
1874 .collect();
1875 let caption_inlines = node
1876 .children()
1877 .find(|c| c.kind() == SyntaxKind::TABLE_CAPTION)
1878 .map(|n| pipe_table_caption(&n))
1879 .unwrap_or_default();
1880 let (attr, caption_inlines) = extract_caption_attrs(caption_inlines);
1881 Some(TableData {
1882 attr,
1883 caption: caption_inlines,
1884 aligns,
1885 widths: vec![None; cols.len()],
1886 head_rows,
1887 body_rows,
1888 foot_rows: Vec::new(),
1889 })
1890}
1891
1892fn simple_table_dash_runs(separator: &SyntaxNode) -> Vec<(usize, usize)> {
1896 let raw = separator.text().to_string();
1897 let line = raw.trim_end_matches(['\n', '\r']);
1898 let mut runs = Vec::new();
1899 let mut start: Option<usize> = None;
1900 for (i, ch) in line.char_indices() {
1901 if ch == '-' {
1902 if start.is_none() {
1903 start = Some(i);
1904 }
1905 } else if let Some(s) = start.take() {
1906 runs.push((s, i - 1));
1907 }
1908 }
1909 if let Some(s) = start.take() {
1910 runs.push((s, line.len() - 1));
1911 }
1912 runs
1913}
1914
1915fn simple_table_row_cells(row: &SyntaxNode) -> Vec<Vec<Inline>> {
1916 row.children()
1921 .filter(|c| c.kind() == SyntaxKind::TABLE_CELL)
1922 .map(|cell| coalesce_inlines(inlines_from(&cell)))
1923 .collect()
1924}
1925
1926fn simple_table_row_is_all_dashes(row: &SyntaxNode) -> bool {
1927 let mut had_cell = false;
1928 for cell in row
1929 .children()
1930 .filter(|c| c.kind() == SyntaxKind::TABLE_CELL)
1931 {
1932 let text = cell.text().to_string();
1933 let trimmed = text.trim();
1934 if trimmed.is_empty() {
1935 continue;
1936 }
1937 had_cell = true;
1938 if !trimmed.chars().all(|c| c == '-') {
1939 return false;
1940 }
1941 }
1942 had_cell
1943}
1944
1945fn simple_table_aligns(row: &SyntaxNode, cols: &[(usize, usize)]) -> Vec<&'static str> {
1953 let row_start: u32 = row.text_range().start().into();
1954 let mut cell_ranges: Vec<(usize, usize)> = Vec::new();
1955 for cell in row
1956 .children()
1957 .filter(|c| c.kind() == SyntaxKind::TABLE_CELL)
1958 {
1959 if cell.text_range().is_empty() {
1960 continue;
1961 }
1962 let text = cell.text().to_string();
1963 let lstrip = text.chars().take_while(|c| *c == ' ' || *c == '\t').count();
1964 let rstrip = text
1965 .chars()
1966 .rev()
1967 .take_while(|c| *c == ' ' || *c == '\t')
1968 .count();
1969 let trimmed_len = text.chars().count().saturating_sub(lstrip + rstrip);
1970 if trimmed_len == 0 {
1971 continue;
1972 }
1973 let start: u32 = cell.text_range().start().into();
1974 let s = (start - row_start) as usize;
1975 let visible_start = s + lstrip;
1976 let visible_end = visible_start + trimmed_len - 1;
1977 cell_ranges.push((visible_start, visible_end));
1978 }
1979 cols.iter()
1980 .map(|(col_start, col_end)| {
1981 let cell = cell_ranges
1982 .iter()
1983 .find(|(cs, ce)| ce >= col_start && cs <= col_end);
1984 match cell {
1985 Some((cs, ce)) => {
1986 let left_flush = cs == col_start;
1987 let right_flush = ce == col_end;
1988 match (left_flush, right_flush) {
1989 (true, true) => "AlignDefault",
1990 (true, false) => "AlignLeft",
1991 (false, true) => "AlignRight",
1992 (false, false) => "AlignCenter",
1993 }
1994 }
1995 None => "AlignDefault",
1996 }
1997 })
1998 .collect()
1999}
2000
2001#[allow(clippy::needless_range_loop)]
2028fn grid_table(node: &SyntaxNode) -> Option<TableData> {
2029 let mut tagged: Vec<(SyntaxKind, String)> = Vec::new();
2031 for child in node.children() {
2032 if child.kind() == SyntaxKind::TABLE_CAPTION {
2033 continue;
2034 }
2035 let text = child.text().to_string();
2036 for line in text.split_inclusive('\n') {
2037 let trimmed = line.trim_end_matches('\n');
2038 tagged.push((child.kind(), trimmed.to_string()));
2039 }
2040 }
2041 if tagged.is_empty() {
2042 return None;
2043 }
2044
2045 let max_width = tagged
2047 .iter()
2048 .map(|(_, l)| l.chars().count())
2049 .max()
2050 .unwrap_or(0);
2051 let grid: Vec<Vec<char>> = tagged
2052 .iter()
2053 .map(|(_, l)| {
2054 let mut chars: Vec<char> = l.chars().collect();
2055 chars.resize(max_width, ' ');
2056 chars
2057 })
2058 .collect();
2059 let nlines = grid.len();
2060
2061 let is_sep_line: Vec<bool> = grid
2065 .iter()
2066 .map(|row| {
2067 row.contains(&'+')
2068 && row
2069 .iter()
2070 .all(|&c| matches!(c, '+' | '-' | '=' | ':' | '|' | ' '))
2071 })
2072 .collect();
2073
2074 let mut col_set: std::collections::BTreeSet<usize> = std::collections::BTreeSet::new();
2076 for (i, row) in grid.iter().enumerate() {
2077 if !is_sep_line[i] {
2078 continue;
2079 }
2080 for (j, &c) in row.iter().enumerate() {
2081 if c == '+' {
2082 col_set.insert(j);
2083 }
2084 }
2085 }
2086 let cols_pos: Vec<usize> = col_set.into_iter().collect();
2087 if cols_pos.len() < 2 {
2088 return None;
2089 }
2090 let ncols = cols_pos.len() - 1;
2091
2092 let row_seps: Vec<usize> = (0..nlines).filter(|&i| is_sep_line[i]).collect();
2094 if row_seps.len() < 2 {
2095 return None;
2096 }
2097 let nrows = row_seps.len() - 1;
2098
2099 let mut block_kind: Vec<&'static str> = vec!["body"; nrows];
2102 for r in 0..nrows {
2103 let start = row_seps[r];
2104 let end = row_seps[r + 1];
2105 for i in (start + 1)..end {
2106 match tagged[i].0 {
2107 SyntaxKind::TABLE_HEADER => block_kind[r] = "head",
2108 SyntaxKind::TABLE_FOOTER => block_kind[r] = "foot",
2109 _ => {}
2110 }
2111 }
2112 }
2113
2114 let mut occupied = vec![vec![false; ncols]; nrows];
2116 let mut cells: Vec<(usize, usize, u32, u32, String)> = Vec::new();
2118 for sr in 0..nrows {
2119 for sc in 0..ncols {
2120 if occupied[sr][sc] {
2121 continue;
2122 }
2123 let i = row_seps[sr];
2124 let j = cols_pos[sc];
2125 if grid[i][j] != '+' {
2126 continue;
2131 }
2132 let Some((er, ec, content)) = find_grid_cell(&grid, i, j, sr, sc, &cols_pos, &row_seps)
2133 else {
2134 continue;
2135 };
2136 let row_span = (er - sr) as u32;
2137 let col_span = (ec - sc) as u32;
2138 for r in sr..er {
2139 for c in sc..ec {
2140 occupied[r][c] = true;
2141 }
2142 }
2143 cells.push((sr, sc, row_span, col_span, content));
2144 }
2145 }
2146
2147 let mut head_rows: Vec<Vec<GridCell>> = Vec::new();
2150 let mut body_rows: Vec<Vec<GridCell>> = Vec::new();
2151 let mut foot_rows: Vec<Vec<GridCell>> = Vec::new();
2152 for r in 0..nrows {
2153 let mut row_cells: Vec<&(usize, usize, u32, u32, String)> =
2154 cells.iter().filter(|(sr, _, _, _, _)| *sr == r).collect();
2155 row_cells.sort_by_key(|(_, sc, _, _, _)| *sc);
2156 let row: Vec<GridCell> = row_cells
2157 .into_iter()
2158 .map(|(_, _, rs, cs, text)| {
2159 let blocks = parse_grid_cell_text(text);
2160 GridCell {
2161 row_span: *rs,
2162 col_span: *cs,
2163 blocks,
2164 }
2165 })
2166 .collect();
2167 match block_kind[r] {
2168 "head" => head_rows.push(row),
2169 "foot" => foot_rows.push(row),
2170 _ => body_rows.push(row),
2171 }
2172 }
2173
2174 let alignment_sep = node
2177 .children()
2178 .filter(|c| c.kind() == SyntaxKind::TABLE_SEPARATOR)
2179 .find(|c| c.text().to_string().contains(':'))
2180 .or_else(|| {
2181 node.children()
2182 .find(|c| c.kind() == SyntaxKind::TABLE_SEPARATOR)
2183 })?;
2184 let widths = grid_dash_widths(&alignment_sep);
2185 let aligns_raw = alignment_sep.text().to_string();
2186 let aligns = if aligns_raw.contains(':') {
2187 grid_separator_aligns(&aligns_raw, ncols)
2188 } else {
2189 vec!["AlignDefault"; ncols]
2190 };
2191
2192 let caption_inlines = node
2194 .children()
2195 .find(|c| c.kind() == SyntaxKind::TABLE_CAPTION)
2196 .map(|n| pipe_table_caption(&n))
2197 .unwrap_or_default();
2198 let (attr, caption_inlines) = extract_caption_attrs(caption_inlines);
2199
2200 Some(TableData {
2201 attr,
2202 caption: caption_inlines,
2203 aligns,
2204 widths: widths.into_iter().map(Some).collect(),
2205 head_rows,
2206 body_rows,
2207 foot_rows,
2208 })
2209}
2210
2211#[allow(clippy::needless_range_loop)]
2221fn find_grid_cell(
2222 grid: &[Vec<char>],
2223 i: usize,
2224 j: usize,
2225 sr: usize,
2226 sc: usize,
2227 cols_pos: &[usize],
2228 row_seps: &[usize],
2229) -> Option<(usize, usize, String)> {
2230 let nrows = row_seps.len() - 1;
2231 let ncols = cols_pos.len() - 1;
2232
2233 for ec in (sc + 1)..=ncols {
2234 let k = cols_pos[ec];
2235 let top_ok = (j + 1..k).all(|c| matches!(grid[i][c], '-' | '=' | ':' | '+'));
2237 if !top_ok {
2238 break;
2240 }
2241 for er in (sr + 1)..=nrows {
2242 let l = row_seps[er];
2243 let left_ok = (i + 1..l).all(|r| matches!(grid[r][j], '|' | '+'));
2245 if !left_ok {
2246 break;
2247 }
2248 let right_ok = (i + 1..l).all(|r| matches!(grid[r][k], '|' | '+'));
2250 if !right_ok {
2251 continue;
2252 }
2253 let bot_ok = (j + 1..k).all(|c| matches!(grid[l][c], '-' | '=' | ':' | '+'));
2255 if !bot_ok {
2256 continue;
2257 }
2258 if grid[l][j] != '+' || grid[l][k] != '+' {
2259 continue;
2260 }
2261 let interior_split = (i + 1..l).any(|m| {
2267 grid[m][j] == '+'
2268 && grid[m][k] == '+'
2269 && (j + 1..k).all(|c| matches!(grid[m][c], '-' | '=' | ':' | '+'))
2270 });
2271 if interior_split {
2272 continue;
2273 }
2274
2275 let mut content_lines: Vec<String> = Vec::new();
2279 for r in (i + 1)..l {
2280 let slice: String = grid[r][j + 1..k].iter().collect();
2281 let stripped = slice.strip_prefix(' ').unwrap_or(&slice).to_string();
2282 content_lines.push(stripped.trim_end().to_string());
2283 }
2284 let first = content_lines.iter().position(|s| !s.is_empty());
2286 let last = content_lines.iter().rposition(|s| !s.is_empty());
2287 let content = match (first, last) {
2288 (Some(f), Some(l)) => content_lines[f..=l].join("\n"),
2289 _ => String::new(),
2290 };
2291 return Some((er, ec, content));
2292 }
2293 }
2294 None
2295}
2296
2297fn parse_grid_cell_text(text: &str) -> Vec<Block> {
2301 if text.trim().is_empty() {
2302 return Vec::new();
2303 }
2304 let opts = crate::ParserOptions {
2305 flavor: crate::Flavor::Pandoc,
2306 dialect: crate::Dialect::for_flavor(crate::Flavor::Pandoc),
2307 extensions: crate::Extensions::for_flavor(crate::Flavor::Pandoc),
2308 ..crate::ParserOptions::default()
2309 };
2310 let doc = crate::parse(text, Some(opts));
2311 let mut out = Vec::new();
2312 for child in doc.children() {
2313 if let Some(block) = block_from(&child) {
2314 let block = match block {
2315 Block::Para(inlines) => Block::Plain(inlines),
2316 other => other,
2317 };
2318 out.push(block);
2319 }
2320 }
2321 out
2322}
2323
2324fn grid_dash_widths(separator: &SyntaxNode) -> Vec<f64> {
2335 let raw_text = separator.text().to_string();
2336 let line = raw_text.trim_end_matches(['\n', '\r']);
2337 let mut raw: Vec<usize> = Vec::new();
2338 let mut count: usize = 0;
2339 let mut in_col = false;
2340 for ch in line.chars() {
2341 match ch {
2342 '+' => {
2343 if in_col {
2344 raw.push(count + 1);
2345 count = 0;
2346 }
2347 in_col = true;
2348 }
2349 _ => {
2350 if in_col {
2351 count += 1;
2352 }
2353 }
2354 }
2355 }
2356 if raw.is_empty() {
2357 return Vec::new();
2358 }
2359 let total: usize = raw.iter().sum();
2360 let count = raw.len();
2361 let norm = (total + count).saturating_sub(2).max(72) as f64;
2362 raw.into_iter().map(|w| w as f64 / norm).collect()
2363}
2364
2365fn grid_separator_aligns(raw: &str, cols: usize) -> Vec<&'static str> {
2366 let line = raw.trim_end_matches(['\n', '\r']);
2367 let mut aligns: Vec<&'static str> = Vec::with_capacity(cols);
2368 let mut col_start: Option<usize> = None;
2369 for (i, ch) in line.char_indices() {
2370 if ch == '+' {
2371 if let Some(s) = col_start.take() {
2372 let seg = &line[s..i];
2373 aligns.push(grid_segment_align(seg));
2374 }
2375 col_start = Some(i + 1);
2376 }
2377 }
2378 while aligns.len() < cols {
2379 aligns.push("AlignDefault");
2380 }
2381 aligns.truncate(cols);
2382 aligns
2383}
2384
2385fn grid_segment_align(seg: &str) -> &'static str {
2386 let bytes = seg.as_bytes();
2387 let left = bytes.first() == Some(&b':');
2388 let right = bytes.last() == Some(&b':');
2389 match (left, right) {
2390 (true, true) => "AlignCenter",
2391 (true, false) => "AlignLeft",
2392 (false, true) => "AlignRight",
2393 _ => "AlignDefault",
2394 }
2395}
2396
2397fn multiline_table(node: &SyntaxNode) -> Option<TableData> {
2407 let separators: Vec<SyntaxNode> = node
2410 .children()
2411 .filter(|c| c.kind() == SyntaxKind::TABLE_SEPARATOR)
2412 .collect();
2413 let header = node
2414 .children()
2415 .find(|c| c.kind() == SyntaxKind::TABLE_HEADER);
2416 let column_sep = if header.is_some() {
2417 separators.get(1).cloned()
2418 } else {
2419 separators.first().cloned()
2420 }?;
2421 let cols = simple_table_dash_runs(&column_sep);
2422 if cols.is_empty() {
2423 return None;
2424 }
2425 let raw: Vec<usize> = cols
2430 .iter()
2431 .enumerate()
2432 .map(|(i, (s, e))| {
2433 if i + 1 < cols.len() {
2434 cols[i + 1].0 - s
2435 } else {
2436 e - s + 2
2437 }
2438 })
2439 .collect();
2440 let total: usize = raw.iter().sum();
2441 let norm = (total.max(72)) as f64;
2442 let widths: Vec<f64> = raw.into_iter().map(|w| w as f64 / norm).collect();
2443 let aligns = if let Some(h) = &header {
2446 simple_table_aligns(h, &cols)
2447 } else if let Some(r0) = node.children().find(|c| c.kind() == SyntaxKind::TABLE_ROW) {
2448 simple_table_aligns(&r0, &cols)
2449 } else {
2450 vec!["AlignDefault"; cols.len()]
2451 };
2452 let head_rows = match &header {
2453 Some(h) => vec![
2454 multiline_row_cells_blocks(h, &cols)
2455 .into_iter()
2456 .map(GridCell::no_span)
2457 .collect(),
2458 ],
2459 None => Vec::new(),
2460 };
2461 let body_rows: Vec<Vec<GridCell>> = node
2462 .children()
2463 .filter(|c| c.kind() == SyntaxKind::TABLE_ROW)
2464 .map(|r| {
2465 multiline_row_cells_blocks(&r, &cols)
2466 .into_iter()
2467 .map(GridCell::no_span)
2468 .collect()
2469 })
2470 .collect();
2471 let caption_inlines = node
2472 .children()
2473 .find(|c| c.kind() == SyntaxKind::TABLE_CAPTION)
2474 .map(|n| pipe_table_caption(&n))
2475 .unwrap_or_default();
2476 let (attr, caption_inlines) = extract_caption_attrs(caption_inlines);
2477 Some(TableData {
2478 attr,
2479 caption: caption_inlines,
2480 aligns,
2481 widths: widths.into_iter().map(Some).collect(),
2482 head_rows,
2483 body_rows,
2484 foot_rows: Vec::new(),
2485 })
2486}
2487
2488fn multiline_row_cells_blocks(row: &SyntaxNode, cols: &[(usize, usize)]) -> Vec<Vec<Block>> {
2492 let row_start: u32 = row.text_range().start().into();
2493 let raw = row.text().to_string();
2494 let lines: Vec<&str> = raw.split_inclusive('\n').collect();
2498 let mut col_lines: Vec<Vec<String>> = vec![Vec::new(); cols.len()];
2499 let mut line_start_offset: usize = 0;
2500 for line in lines {
2501 let line_no_nl = line.trim_end_matches('\n');
2502 if line_no_nl.trim().is_empty() {
2503 line_start_offset += line.len();
2504 continue;
2505 }
2506 for (i, &(cs, ce)) in cols.iter().enumerate() {
2507 let slice = char_slice(line_no_nl, cs, ce + 1);
2509 let trimmed = slice.trim();
2510 if !trimmed.is_empty() {
2511 col_lines[i].push(trimmed.to_string());
2512 }
2513 }
2514 line_start_offset += line.len();
2515 }
2516 let _ = (row_start, line_start_offset);
2517 cols.iter()
2518 .enumerate()
2519 .map(|(i, _)| {
2520 let segments = &col_lines[i];
2521 if segments.is_empty() {
2522 return Vec::new();
2523 }
2524 let joined = segments.join("\n");
2530 let inlines = parse_cell_text_inlines(&joined);
2531 if inlines.is_empty() {
2532 return Vec::new();
2533 }
2534 vec![Block::Plain(coalesce_inlines(inlines))]
2535 })
2536 .collect()
2537}
2538
2539fn parse_cell_text_inlines(text: &str) -> Vec<Inline> {
2545 if text.trim().is_empty() {
2546 return Vec::new();
2547 }
2548 let opts = crate::ParserOptions {
2549 flavor: crate::Flavor::Pandoc,
2550 dialect: crate::Dialect::for_flavor(crate::Flavor::Pandoc),
2551 extensions: crate::Extensions::for_flavor(crate::Flavor::Pandoc),
2552 ..crate::ParserOptions::default()
2553 };
2554 let doc = crate::parse(text, Some(opts));
2555 for node in doc.descendants() {
2556 if matches!(node.kind(), SyntaxKind::PARAGRAPH | SyntaxKind::PLAIN) {
2557 return inlines_from(&node);
2558 }
2559 }
2560 Vec::new()
2561}
2562
2563fn char_slice(s: &str, start_char: usize, end_char: usize) -> &str {
2564 let mut start_byte = s.len();
2565 let mut end_byte = s.len();
2566 for (i, (b, _)) in s.char_indices().enumerate() {
2567 if i == start_char {
2568 start_byte = b;
2569 }
2570 if i == end_char {
2571 end_byte = b;
2572 break;
2573 }
2574 }
2575 if start_byte > end_byte {
2576 return "";
2577 }
2578 &s[start_byte..end_byte]
2579}
2580
2581fn list_block(node: &SyntaxNode) -> Block {
2582 let loose = is_loose_list(node);
2583 let items: Vec<Vec<Block>> = node
2584 .children()
2585 .filter(|c| c.kind() == SyntaxKind::LIST_ITEM)
2586 .map(|item| list_item_blocks(&item, loose))
2587 .collect();
2588 if list_is_ordered(node) {
2589 let (start, style, delim) = ordered_list_attrs(node);
2590 Block::OrderedList(start, style, delim, items)
2591 } else {
2592 Block::BulletList(items)
2593 }
2594}
2595
2596fn list_is_ordered(node: &SyntaxNode) -> bool {
2597 let Some(item) = node.children().find(|c| c.kind() == SyntaxKind::LIST_ITEM) else {
2598 return false;
2599 };
2600 let marker = item
2601 .children_with_tokens()
2602 .filter_map(|el| el.into_token())
2603 .find(|t| t.kind() == SyntaxKind::LIST_MARKER)
2604 .map(|t| t.text().to_string())
2605 .unwrap_or_default();
2606 let trimmed = marker.trim();
2607 !trimmed.starts_with(['-', '+', '*'])
2608}
2609
2610fn ordered_list_attrs(node: &SyntaxNode) -> (usize, &'static str, &'static str) {
2611 let item = node.children().find(|c| c.kind() == SyntaxKind::LIST_ITEM);
2612 let marker = item
2613 .as_ref()
2614 .and_then(|i| {
2615 i.children_with_tokens()
2616 .filter_map(|el| el.into_token())
2617 .find(|t| t.kind() == SyntaxKind::LIST_MARKER)
2618 .map(|t| t.text().to_string())
2619 })
2620 .unwrap_or_default();
2621 let (mut start, style, delim) = classify_ordered_marker(marker.trim());
2622 if style == "Example" {
2623 let offset: u32 = node.text_range().start().into();
2624 if let Some(s) = REFS_CTX.with(|c| {
2625 c.borrow()
2626 .example_list_start_by_offset
2627 .get(&offset)
2628 .copied()
2629 }) {
2630 start = s;
2631 }
2632 }
2633 (start, style, delim)
2634}
2635
2636fn classify_ordered_marker(trimmed: &str) -> (usize, &'static str, &'static str) {
2644 let (body, delim) =
2646 if let Some(inner) = trimmed.strip_prefix('(').and_then(|s| s.strip_suffix(')')) {
2647 (inner, "TwoParens")
2648 } else if let Some(inner) = trimmed.strip_suffix(')') {
2649 (inner, "OneParen")
2650 } else if let Some(inner) = trimmed.strip_suffix('.') {
2651 (inner, "Period")
2652 } else {
2653 (trimmed, "DefaultDelim")
2654 };
2655
2656 if !body.is_empty() && body.chars().all(|c| c.is_ascii_digit()) {
2658 let start: usize = body.parse().unwrap_or(1);
2659 return (start, "Decimal", delim);
2660 }
2661
2662 if body == "#" {
2665 return (1, "DefaultStyle", "DefaultDelim");
2666 }
2667
2668 if let Some(rest) = body.strip_prefix('@')
2670 && rest
2671 .chars()
2672 .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
2673 {
2674 return (1, "Example", delim);
2675 }
2676
2677 if body == "i" {
2679 return (1, "LowerRoman", delim);
2680 }
2681 if body == "I" {
2682 return (1, "UpperRoman", delim);
2683 }
2684
2685 if body.len() == 1
2687 && let Some(c) = body.chars().next()
2688 {
2689 if c.is_ascii_lowercase() {
2690 return ((c as u8 - b'a') as usize + 1, "LowerAlpha", delim);
2691 }
2692 if c.is_ascii_uppercase() {
2693 return ((c as u8 - b'A') as usize + 1, "UpperAlpha", delim);
2694 }
2695 }
2696
2697 if body
2699 .chars()
2700 .all(|c| matches!(c, 'i' | 'v' | 'x' | 'l' | 'c' | 'd' | 'm'))
2701 && let Some(n) = roman_to_int(body, false)
2702 {
2703 return (n, "LowerRoman", delim);
2704 }
2705 if body
2706 .chars()
2707 .all(|c| matches!(c, 'I' | 'V' | 'X' | 'L' | 'C' | 'D' | 'M'))
2708 && let Some(n) = roman_to_int(body, true)
2709 {
2710 return (n, "UpperRoman", delim);
2711 }
2712
2713 (1, "Decimal", delim)
2716}
2717
2718fn roman_to_int(s: &str, upper: bool) -> Option<usize> {
2722 let normalize = |c: char| if upper { c } else { c.to_ascii_uppercase() };
2723 let value = |c: char| match c {
2724 'I' => 1,
2725 'V' => 5,
2726 'X' => 10,
2727 'L' => 50,
2728 'C' => 100,
2729 'D' => 500,
2730 'M' => 1000,
2731 _ => 0,
2732 };
2733 let chars: Vec<char> = s.chars().map(normalize).collect();
2734 if chars.is_empty() {
2735 return None;
2736 }
2737 let mut total = 0usize;
2738 let mut i = 0;
2739 while i < chars.len() {
2740 let v = value(chars[i]);
2741 if v == 0 {
2742 return None;
2743 }
2744 let next = chars.get(i + 1).copied().map(value).unwrap_or(0);
2745 if v < next {
2746 total += next - v;
2747 i += 2;
2748 } else {
2749 total += v;
2750 i += 1;
2751 }
2752 }
2753 Some(total)
2754}
2755
2756fn list_item_blocks(item: &SyntaxNode, loose: bool) -> Vec<Block> {
2757 let mut out = Vec::new();
2758 let item_indent = list_item_content_offset(item);
2759 let task_checkbox = task_checkbox_for_item(item);
2760 let mut checkbox_emitted = false;
2761 for child in item.children() {
2762 match child.kind() {
2763 SyntaxKind::PLAIN => {
2764 let mut inlines = coalesce_inlines(inlines_from(&child));
2765 if inlines.is_empty() {
2770 continue;
2771 }
2772 if !checkbox_emitted && let Some(glyph) = task_checkbox {
2773 inlines.insert(0, Inline::Space);
2774 inlines.insert(0, Inline::Str(glyph.to_string()));
2775 checkbox_emitted = true;
2776 }
2777 if loose {
2778 out.push(Block::Para(inlines));
2779 } else {
2780 out.push(Block::Plain(inlines));
2781 }
2782 }
2783 SyntaxKind::CODE_BLOCK => {
2784 out.push(indented_code_block_with_extra_strip(&child, item_indent));
2792 }
2793 _ => collect_block(&child, &mut out),
2794 }
2795 }
2796 out
2797}
2798
2799fn task_checkbox_for_item(item: &SyntaxNode) -> Option<&'static str> {
2804 item.children_with_tokens()
2805 .filter_map(|el| el.into_token())
2806 .find(|t| t.kind() == SyntaxKind::TASK_CHECKBOX)
2807 .map(|t| {
2808 let text = t.text();
2809 if text.contains('x') || text.contains('X') {
2810 "\u{2612}"
2811 } else {
2812 "\u{2610}"
2813 }
2814 })
2815}
2816
2817fn list_item_content_offset(item: &SyntaxNode) -> usize {
2836 let parent_ws = parent_list_leading_ws(item);
2837 let mut marker_width = 0usize;
2838 let mut leading_ws = 0usize;
2839 let mut saw_marker = false;
2840 for el in item.children_with_tokens() {
2841 if let NodeOrToken::Token(t) = el {
2842 match t.kind() {
2843 SyntaxKind::WHITESPACE if !saw_marker => {
2844 leading_ws += t.text().chars().count();
2845 }
2846 SyntaxKind::LIST_MARKER => {
2847 marker_width += t.text().chars().count();
2848 saw_marker = true;
2849 }
2850 SyntaxKind::WHITESPACE if saw_marker => {
2851 return parent_ws + leading_ws + marker_width + t.text().chars().count();
2852 }
2853 _ if saw_marker => {
2854 return parent_ws + leading_ws + marker_width;
2855 }
2856 _ => {}
2857 }
2858 } else if saw_marker {
2859 return parent_ws + leading_ws + marker_width;
2860 }
2861 }
2862 parent_ws + leading_ws + marker_width
2863}
2864
2865fn parent_list_leading_ws(item: &SyntaxNode) -> usize {
2870 let prev = item.prev_sibling_or_token();
2871 match prev {
2872 Some(NodeOrToken::Token(t)) if t.kind() == SyntaxKind::WHITESPACE => {
2873 t.text().chars().count()
2874 }
2875 _ => 0,
2876 }
2877}
2878
2879fn is_loose_list(node: &SyntaxNode) -> bool {
2880 let mut prev_was_item = false;
2881 for child in node.children_with_tokens() {
2882 if let NodeOrToken::Node(n) = child {
2883 if n.kind() == SyntaxKind::LIST_ITEM {
2884 prev_was_item = true;
2885 } else if n.kind() == SyntaxKind::BLANK_LINE
2886 && prev_was_item
2887 && n.next_sibling()
2888 .map(|s| s.kind() == SyntaxKind::LIST_ITEM)
2889 .unwrap_or(false)
2890 {
2891 return true;
2892 }
2893 }
2894 }
2895 for item in node
2896 .children()
2897 .filter(|c| c.kind() == SyntaxKind::LIST_ITEM)
2898 {
2899 if item.children().any(|c| c.kind() == SyntaxKind::PARAGRAPH) {
2900 return true;
2901 }
2902 if has_internal_blank_between_blocks(&item) {
2907 return true;
2908 }
2909 }
2910 false
2911}
2912
2913fn has_internal_blank_between_blocks(item: &SyntaxNode) -> bool {
2914 let mut saw_block_before = false;
2915 let mut pending_blank = false;
2916 for child in item.children() {
2917 match child.kind() {
2918 SyntaxKind::BLANK_LINE => {
2919 if saw_block_before {
2920 pending_blank = true;
2921 }
2922 }
2923 SyntaxKind::PLAIN if child_is_empty_plain(&child) => {}
2927 _ => {
2928 if pending_blank {
2929 return true;
2930 }
2931 saw_block_before = true;
2932 }
2933 }
2934 }
2935 false
2936}
2937
2938fn child_is_empty_plain(node: &SyntaxNode) -> bool {
2939 !node.children_with_tokens().any(|el| match el {
2940 NodeOrToken::Token(t) => !matches!(t.kind(), SyntaxKind::NEWLINE | SyntaxKind::WHITESPACE),
2941 NodeOrToken::Node(_) => true,
2942 })
2943}
2944
2945fn inlines_from(parent: &SyntaxNode) -> Vec<Inline> {
2948 let mut out = Vec::new();
2949 let mut iter = parent.children_with_tokens().peekable();
2950 while let Some(el) = iter.next() {
2951 match el {
2952 NodeOrToken::Token(t) => push_token_inline(&t, &mut out),
2953 NodeOrToken::Node(n) if n.kind() == SyntaxKind::LATEX_COMMAND => {
2954 emit_latex_command_with_absorb(&n, &mut iter, &mut out);
2955 }
2956 NodeOrToken::Node(n) if n.kind() == SyntaxKind::CITATION => {
2957 emit_citation_with_absorb(&n, &mut iter, &mut out);
2958 }
2959 NodeOrToken::Node(n) => push_inline_node(&n, &mut out),
2960 }
2961 }
2962 while matches!(out.last(), Some(Inline::SoftBreak)) {
2966 out.pop();
2967 }
2968 out
2969}
2970
2971fn emit_citation_with_absorb<I>(
2979 node: &SyntaxNode,
2980 iter: &mut std::iter::Peekable<I>,
2981 out: &mut Vec<Inline>,
2982) where
2983 I: Iterator<Item = rowan::SyntaxElement<crate::syntax::PanacheLanguage>>,
2984{
2985 let bracketed = node
2986 .children_with_tokens()
2987 .filter_map(|el| el.into_token())
2988 .any(|t| t.kind() == SyntaxKind::LINK_START);
2989 if bracketed {
2990 render_citation_inline(node, out, None);
2991 return;
2992 }
2993 let next_sibling_pair = node.next_sibling_or_token().and_then(|el1| {
2997 let t = el1.as_token().cloned()?;
2998 if t.kind() != SyntaxKind::TEXT || !t.text().starts_with(' ') {
2999 return None;
3000 }
3001 let space_text = t.text().to_string();
3002 let link_el = t.next_sibling_or_token()?;
3003 let link = link_el.as_node().cloned()?;
3004 if link.kind() != SyntaxKind::LINK && link.kind() != SyntaxKind::UNRESOLVED_REFERENCE {
3009 return None;
3010 }
3011 let has_dest = link
3012 .children_with_tokens()
3013 .filter_map(|el| el.into_token())
3014 .any(|tok| tok.kind() == SyntaxKind::LINK_DEST_START);
3015 if has_dest {
3016 return None;
3017 }
3018 let link_text = link
3019 .children()
3020 .find(|c| c.kind() == SyntaxKind::LINK_TEXT)
3021 .map(|tt| tt.text().to_string())
3022 .unwrap_or_default();
3023 Some((space_text, link_text))
3024 });
3025 if let Some((_space_text, locator_text)) = next_sibling_pair {
3026 iter.next();
3028 iter.next();
3029 render_citation_inline(node, out, Some(&locator_text));
3030 } else {
3031 render_citation_inline(node, out, None);
3032 }
3033}
3034
3035fn emit_latex_command_with_absorb<I>(
3042 node: &SyntaxNode,
3043 iter: &mut std::iter::Peekable<I>,
3044 out: &mut Vec<Inline>,
3045) where
3046 I: Iterator<Item = rowan::SyntaxElement<crate::syntax::PanacheLanguage>>,
3047{
3048 let mut content = node.text().to_string();
3049 let ends_in_letter = content
3050 .chars()
3051 .next_back()
3052 .is_some_and(|c| c.is_ascii_alphabetic());
3053 if ends_in_letter
3054 && let Some(NodeOrToken::Token(t)) = iter.peek()
3055 && t.kind() == SyntaxKind::TEXT
3056 {
3057 let text = t.text().to_string();
3058 let bytes = text.as_bytes();
3059 let mut absorbed = 0;
3060 while absorbed < bytes.len() && (bytes[absorbed] == b' ' || bytes[absorbed] == b'\t') {
3061 absorbed += 1;
3062 }
3063 if absorbed > 0 {
3064 content.push_str(&text[..absorbed]);
3065 out.push(Inline::RawInline("tex".to_string(), content));
3066 iter.next();
3067 let remainder = &text[absorbed..];
3068 if !remainder.is_empty() {
3069 push_text(remainder, out);
3070 }
3071 return;
3072 }
3073 }
3074 out.push(Inline::RawInline("tex".to_string(), content));
3075}
3076
3077fn push_inline_node(node: &SyntaxNode, out: &mut Vec<Inline>) {
3078 match node.kind() {
3079 SyntaxKind::LINK => render_link_inline(node, out),
3080 SyntaxKind::IMAGE_LINK => render_image_inline(node, out),
3081 SyntaxKind::CITATION => render_citation_inline(node, out, None),
3082 SyntaxKind::UNRESOLVED_REFERENCE => render_unresolved_reference_inline(node, out),
3090 _ => out.push(inline_from_node(node)),
3091 }
3092}
3093
3094fn render_unresolved_reference_inline(node: &SyntaxNode, out: &mut Vec<Inline>) {
3107 let is_image = node
3108 .children()
3109 .any(|c| c.kind() == SyntaxKind::IMAGE_LINK_START);
3110 let text_node = if is_image {
3111 node.children().find(|c| c.kind() == SyntaxKind::IMAGE_ALT)
3112 } else {
3113 node.children().find(|c| c.kind() == SyntaxKind::LINK_TEXT)
3114 };
3115 let ref_node = node.children().find(|c| c.kind() == SyntaxKind::LINK_REF);
3116
3117 let text_label = text_node
3118 .as_ref()
3119 .map(|n| n.text().to_string())
3120 .unwrap_or_default();
3121 let (label, has_second_brackets, second_inner) = match ref_node.as_ref() {
3122 Some(rn) => {
3123 let inner = rn.text().to_string();
3124 if inner.is_empty() {
3125 (text_label.clone(), true, String::new())
3126 } else {
3127 (inner.clone(), true, inner)
3128 }
3129 }
3130 None => (text_label.clone(), false, String::new()),
3131 };
3132
3133 if !is_image && let Some(id) = lookup_heading_id(&label) {
3136 let url = format!("#{id}");
3137 let resolved_text_inlines = text_node
3138 .as_ref()
3139 .map(|n| coalesce_inlines(inlines_from(n)))
3140 .unwrap_or_default();
3141 out.push(Inline::Link(
3142 extract_attr_from_node(node),
3143 resolved_text_inlines,
3144 url,
3145 String::new(),
3146 ));
3147 return;
3148 }
3149
3150 let unresolved_text_inlines = text_node
3153 .as_ref()
3154 .map(|n| coalesce_inlines_keep_edges(inlines_from(n)))
3155 .unwrap_or_default();
3156 let opener = if is_image { "![" } else { "[" };
3157 out.push(Inline::Str(opener.to_string()));
3158 out.extend(unresolved_text_inlines);
3159 let suffix = if has_second_brackets {
3160 format!("][{second_inner}]")
3161 } else {
3162 "]".to_string()
3163 };
3164 out.push(Inline::Str(suffix));
3165}
3166
3167fn render_citation_inline(
3176 node: &SyntaxNode,
3177 out: &mut Vec<Inline>,
3178 extra_suffix_text: Option<&str>,
3179) {
3180 let first_key = node
3182 .children_with_tokens()
3183 .filter_map(|el| el.into_token())
3184 .find(|t| t.kind() == SyntaxKind::CITATION_KEY)
3185 .map(|t| t.text().to_string())
3186 .unwrap_or_default();
3187 let example_resolution =
3188 REFS_CTX.with(|c| c.borrow().example_label_to_num.get(&first_key).copied());
3189 if let Some(n) = example_resolution {
3190 out.push(Inline::Str(n.to_string()));
3191 return;
3192 }
3193
3194 let bracketed = node
3195 .children_with_tokens()
3196 .filter_map(|el| el.into_token())
3197 .any(|t| t.kind() == SyntaxKind::LINK_START);
3198
3199 let mut builders: Vec<CitationBuilder> = Vec::new();
3200 let mut current: Option<CitationBuilder> = None;
3201 let mut pending_prefix = String::new();
3202 for el in node.children_with_tokens() {
3203 let token = match el {
3204 NodeOrToken::Token(t) => t,
3205 _ => continue,
3206 };
3207 match token.kind() {
3208 SyntaxKind::LINK_START | SyntaxKind::LINK_DEST => {}
3209 SyntaxKind::CITATION_BRACE_OPEN | SyntaxKind::CITATION_BRACE_CLOSE => {}
3210 SyntaxKind::CITATION_MARKER => {
3211 if let Some(c) = current.take() {
3212 builders.push(c);
3213 }
3214 let mode = if token.text() == "-@" {
3215 CitationMode::SuppressAuthor
3216 } else if bracketed {
3217 CitationMode::NormalCitation
3218 } else {
3219 CitationMode::AuthorInText
3220 };
3221 current = Some(CitationBuilder::new(
3222 std::mem::take(&mut pending_prefix),
3223 mode,
3224 ));
3225 }
3226 SyntaxKind::CITATION_KEY => {
3227 if let Some(c) = &mut current {
3228 c.id.push_str(token.text());
3229 }
3230 }
3231 SyntaxKind::CITATION_CONTENT => {
3232 if let Some(c) = &mut current {
3233 c.suffix_raw.push_str(token.text());
3234 } else {
3235 pending_prefix.push_str(token.text());
3236 }
3237 }
3238 SyntaxKind::CITATION_SEPARATOR => {
3239 if let Some(c) = current.take() {
3240 builders.push(c);
3241 }
3242 }
3243 _ => {}
3244 }
3245 }
3246 if let Some(c) = current.take() {
3247 builders.push(c);
3248 }
3249
3250 if let Some(extra) = extra_suffix_text
3254 && let Some(last) = builders.last_mut()
3255 {
3256 if !last.suffix_raw.is_empty() && !extra.starts_with(' ') {
3257 last.suffix_raw.push(' ');
3258 }
3259 last.suffix_raw.push_str(extra);
3260 }
3261
3262 let note_offset: u32 = node.text_range().start().into();
3263 let note_num = REFS_CTX
3264 .with(|c| {
3265 c.borrow()
3266 .cite_note_num_by_offset
3267 .get(¬e_offset)
3268 .copied()
3269 })
3270 .unwrap_or(1);
3271
3272 let projected: Vec<Citation> = builders
3273 .into_iter()
3274 .map(|b| b.into_citation(note_num))
3275 .collect();
3276
3277 let mut literal = node.text().to_string();
3279 if let Some(extra) = extra_suffix_text {
3280 literal.push(' ');
3281 literal.push('[');
3282 literal.push_str(extra);
3283 literal.push(']');
3284 }
3285 let text_inlines = literal_inlines(&literal);
3286
3287 out.push(Inline::Cite(projected, text_inlines));
3288}
3289
3290struct CitationBuilder {
3296 id: String,
3297 prefix_raw: String,
3298 suffix_raw: String,
3299 mode: CitationMode,
3300}
3301
3302impl CitationBuilder {
3303 fn new(prefix_raw: String, mode: CitationMode) -> Self {
3304 Self {
3305 id: String::new(),
3306 prefix_raw,
3307 suffix_raw: String::new(),
3308 mode,
3309 }
3310 }
3311
3312 fn into_citation(self, note_num: i64) -> Citation {
3313 let prefix = parse_cite_affix_inlines(self.prefix_raw.trim_end(), true);
3314 let suffix = parse_cite_affix_inlines(&self.suffix_raw, false);
3315 Citation {
3316 id: self.id,
3317 prefix,
3318 suffix,
3319 mode: self.mode,
3320 note_num,
3321 hash: 0,
3322 }
3323 }
3324}
3325
3326fn parse_cite_affix_inlines(raw: &str, is_prefix: bool) -> Vec<Inline> {
3338 if raw.is_empty() {
3339 return Vec::new();
3340 }
3341 let trimmed = if is_prefix { raw.trim_start() } else { raw };
3342 if trimmed.is_empty() {
3343 return Vec::new();
3344 }
3345 let leading_space = !is_prefix && trimmed.starts_with([' ', '\t']);
3346 let work = trimmed.trim_start_matches([' ', '\t']);
3347 if work.is_empty() {
3348 return if leading_space {
3349 vec![Inline::Space]
3350 } else {
3351 Vec::new()
3352 };
3353 }
3354 let wrapped = format!("Z {work}");
3355 let inlines = parse_cell_text_inlines(&wrapped);
3356 let mut coalesced = coalesce_inlines(inlines);
3357 if matches!(coalesced.first(), Some(Inline::Str(s)) if s == "Z") {
3359 coalesced.remove(0);
3360 if matches!(coalesced.first(), Some(Inline::Space)) {
3361 coalesced.remove(0);
3362 }
3363 }
3364 if leading_space {
3365 coalesced.insert(0, Inline::Space);
3366 }
3367 coalesced
3368}
3369
3370fn literal_inlines(text: &str) -> Vec<Inline> {
3376 let mut out: Vec<Inline> = Vec::new();
3377 let mut buf = String::new();
3378 for ch in text.chars() {
3379 match ch {
3380 ' ' | '\t' => {
3381 if !buf.is_empty() {
3382 out.push(Inline::Str(std::mem::take(&mut buf)));
3383 }
3384 if !matches!(out.last(), Some(Inline::Space) | Some(Inline::SoftBreak)) {
3385 out.push(Inline::Space);
3386 }
3387 }
3388 '\n' => {
3389 if !buf.is_empty() {
3390 out.push(Inline::Str(std::mem::take(&mut buf)));
3391 }
3392 if matches!(out.last(), Some(Inline::Space)) {
3393 out.pop();
3394 }
3395 out.push(Inline::SoftBreak);
3396 }
3397 _ => buf.push(ch),
3398 }
3399 }
3400 if !buf.is_empty() {
3401 out.push(Inline::Str(buf));
3402 }
3403 out
3404}
3405
3406fn push_token_inline(
3407 t: &rowan::SyntaxToken<crate::syntax::PanacheLanguage>,
3408 out: &mut Vec<Inline>,
3409) {
3410 match t.kind() {
3411 SyntaxKind::TEXT => push_text(t.text(), out),
3412 SyntaxKind::WHITESPACE => out.push(Inline::Space),
3413 SyntaxKind::NEWLINE => out.push(Inline::SoftBreak),
3414 SyntaxKind::HARD_LINE_BREAK => out.push(Inline::LineBreak),
3415 SyntaxKind::ESCAPED_CHAR => {
3416 let s: String = t.text().chars().skip(1).collect();
3418 out.push(Inline::Str(s));
3419 }
3420 SyntaxKind::NONBREAKING_SPACE => out.push(Inline::Str("\u{a0}".to_string())),
3421 _ => {}
3424 }
3425}
3426
3427fn push_text(text: &str, out: &mut Vec<Inline>) {
3428 let mut buf = String::new();
3429 for ch in text.chars() {
3430 if ch == ' ' || ch == '\t' {
3431 if !buf.is_empty() {
3432 out.push(Inline::Str(std::mem::take(&mut buf)));
3433 }
3434 out.push(Inline::Space);
3435 } else if ch == '\n' {
3436 if !buf.is_empty() {
3437 out.push(Inline::Str(std::mem::take(&mut buf)));
3438 }
3439 out.push(Inline::SoftBreak);
3440 } else {
3441 buf.push(ch);
3442 }
3443 }
3444 if !buf.is_empty() {
3445 out.push(Inline::Str(buf));
3446 }
3447}
3448
3449fn inline_from_node(node: &SyntaxNode) -> Inline {
3450 match node.kind() {
3451 SyntaxKind::EMPHASIS => {
3452 Inline::Emph(coalesce_inlines_keep_edges(inlines_from_marked(node)))
3453 }
3454 SyntaxKind::STRONG => {
3455 Inline::Strong(coalesce_inlines_keep_edges(inlines_from_marked(node)))
3456 }
3457 SyntaxKind::STRIKEOUT => {
3458 Inline::Strikeout(coalesce_inlines_keep_edges(inlines_from_marked(node)))
3459 }
3460 SyntaxKind::SUPERSCRIPT => {
3461 Inline::Superscript(coalesce_inlines_keep_edges(inlines_from_marked(node)))
3462 }
3463 SyntaxKind::SUBSCRIPT => {
3464 Inline::Subscript(coalesce_inlines_keep_edges(inlines_from_marked(node)))
3465 }
3466 SyntaxKind::INLINE_CODE => {
3467 let content: String = node
3468 .children_with_tokens()
3469 .filter_map(|el| el.into_token())
3470 .filter(|t| t.kind() == SyntaxKind::INLINE_CODE_CONTENT)
3471 .map(|t| t.text().to_string())
3472 .collect();
3473 Inline::Code(
3474 extract_attr_from_node(node),
3475 strip_inline_code_padding(&content),
3476 )
3477 }
3478 SyntaxKind::LINK | SyntaxKind::IMAGE_LINK | SyntaxKind::UNRESOLVED_REFERENCE => {
3479 Inline::Unsupported(format!("{:?}", node.kind()))
3486 }
3487 SyntaxKind::AUTO_LINK => autolink_inline(node),
3488 SyntaxKind::INLINE_MATH => math_inline(node, "InlineMath"),
3489 SyntaxKind::DISPLAY_MATH => math_inline(node, "DisplayMath"),
3490 SyntaxKind::LATEX_COMMAND => latex_command_inline(node),
3491 SyntaxKind::BRACKETED_SPAN => bracketed_span_inline(node),
3492 SyntaxKind::INLINE_HTML => Inline::RawInline("html".to_string(), node.text().to_string()),
3493 SyntaxKind::FOOTNOTE_REFERENCE => footnote_reference_inline(node),
3494 SyntaxKind::INLINE_FOOTNOTE => inline_footnote_inline(node),
3495 other => Inline::Unsupported(format!("{other:?}")),
3496 }
3497}
3498
3499fn inlines_from_marked(parent: &SyntaxNode) -> Vec<Inline> {
3503 let mut out = Vec::new();
3504 let mut iter = parent.children_with_tokens().peekable();
3505 while let Some(el) = iter.next() {
3506 match el {
3507 NodeOrToken::Token(t) => match t.kind() {
3508 SyntaxKind::EMPHASIS_MARKER
3509 | SyntaxKind::STRONG_MARKER
3510 | SyntaxKind::STRIKEOUT_MARKER
3511 | SyntaxKind::SUPERSCRIPT_MARKER
3512 | SyntaxKind::SUBSCRIPT_MARKER
3513 | SyntaxKind::MARK_MARKER => {}
3514 _ => push_token_inline(&t, &mut out),
3515 },
3516 NodeOrToken::Node(n) => match n.kind() {
3517 SyntaxKind::EMPHASIS_MARKER
3518 | SyntaxKind::STRONG_MARKER
3519 | SyntaxKind::STRIKEOUT_MARKER
3520 | SyntaxKind::SUPERSCRIPT_MARKER
3521 | SyntaxKind::SUBSCRIPT_MARKER
3522 | SyntaxKind::MARK_MARKER => {}
3523 _ if n.kind() == SyntaxKind::LATEX_COMMAND => {
3524 emit_latex_command_with_absorb(&n, &mut iter, &mut out);
3525 }
3526 _ => push_inline_node(&n, &mut out),
3527 },
3528 }
3529 }
3530 out
3531}
3532
3533fn render_link_inline(node: &SyntaxNode, out: &mut Vec<Inline>) {
3534 let text_node = node.children().find(|c| c.kind() == SyntaxKind::LINK_TEXT);
3535 let dest_node = node.children().find(|c| c.kind() == SyntaxKind::LINK_DEST);
3536 let has_dest_paren = node
3537 .children_with_tokens()
3538 .any(|el| matches!(el, NodeOrToken::Token(t) if t.kind() == SyntaxKind::LINK_DEST_START));
3539
3540 if has_dest_paren {
3541 let text = text_node
3542 .as_ref()
3543 .map(|n| coalesce_inlines(inlines_from(n)))
3544 .unwrap_or_default();
3545 let (url, title) = dest_node
3546 .as_ref()
3547 .map(parse_link_dest)
3548 .unwrap_or((String::new(), String::new()));
3549 out.push(Inline::Link(extract_attr_from_node(node), text, url, title));
3550 return;
3551 }
3552
3553 let ref_node = node.children().find(|c| c.kind() == SyntaxKind::LINK_REF);
3556 let resolved_text_inlines = text_node
3557 .as_ref()
3558 .map(|n| coalesce_inlines(inlines_from(n)))
3559 .unwrap_or_default();
3560 let text_label = text_node
3561 .as_ref()
3562 .map(|n| n.text().to_string())
3563 .unwrap_or_default();
3564
3565 let (label, has_second_brackets, second_inner) = match ref_node.as_ref() {
3566 Some(rn) => {
3567 let inner = rn.text().to_string();
3568 if inner.is_empty() {
3569 (text_label.clone(), true, String::new())
3570 } else {
3571 (inner.clone(), true, inner)
3572 }
3573 }
3574 None => (text_label.clone(), false, String::new()),
3575 };
3576
3577 if let Some((url, title)) = lookup_ref(&label) {
3578 out.push(Inline::Link(
3579 extract_attr_from_node(node),
3580 resolved_text_inlines,
3581 url,
3582 title,
3583 ));
3584 return;
3585 }
3586
3587 if let Some(id) = lookup_heading_id(&label) {
3588 let url = format!("#{id}");
3589 out.push(Inline::Link(
3590 extract_attr_from_node(node),
3591 resolved_text_inlines,
3592 url,
3593 String::new(),
3594 ));
3595 return;
3596 }
3597
3598 let unresolved_text_inlines = text_node
3607 .as_ref()
3608 .map(|n| coalesce_inlines_keep_edges(inlines_from(n)))
3609 .unwrap_or_default();
3610 out.push(Inline::Str("[".to_string()));
3611 out.extend(unresolved_text_inlines);
3612 let suffix = if has_second_brackets {
3613 format!("][{second_inner}]")
3614 } else {
3615 "]".to_string()
3616 };
3617 out.push(Inline::Str(suffix));
3618}
3619
3620fn render_image_inline(node: &SyntaxNode, out: &mut Vec<Inline>) {
3621 let alt_node = node.children().find(|c| c.kind() == SyntaxKind::IMAGE_ALT);
3622 let dest_node = node.children().find(|c| c.kind() == SyntaxKind::LINK_DEST);
3623 let has_dest_paren = node.children_with_tokens().any(|el| {
3624 matches!(el, NodeOrToken::Token(t) if t.kind() == SyntaxKind::IMAGE_DEST_START
3625 || t.kind() == SyntaxKind::LINK_DEST_START)
3626 });
3627
3628 if has_dest_paren {
3629 let alt = alt_node
3630 .as_ref()
3631 .map(|n| coalesce_inlines(inlines_from(n)))
3632 .unwrap_or_default();
3633 let (url, title) = dest_node
3634 .as_ref()
3635 .map(parse_link_dest)
3636 .unwrap_or((String::new(), String::new()));
3637 out.push(Inline::Image(extract_attr_from_node(node), alt, url, title));
3638 return;
3639 }
3640
3641 let ref_node = node.children().find(|c| c.kind() == SyntaxKind::LINK_REF);
3642 let alt_inlines = alt_node
3643 .as_ref()
3644 .map(|n| coalesce_inlines(inlines_from(n)))
3645 .unwrap_or_default();
3646 let alt_label = alt_node
3647 .as_ref()
3648 .map(|n| n.text().to_string())
3649 .unwrap_or_default();
3650
3651 let (label, has_second_brackets, second_inner) = match ref_node.as_ref() {
3652 Some(rn) => {
3653 let inner = rn.text().to_string();
3654 if inner.is_empty() {
3655 (alt_label.clone(), true, String::new())
3656 } else {
3657 (inner.clone(), true, inner)
3658 }
3659 }
3660 None => (alt_label.clone(), false, String::new()),
3661 };
3662
3663 if let Some((url, title)) = lookup_ref(&label) {
3664 out.push(Inline::Image(
3665 extract_attr_from_node(node),
3666 alt_inlines,
3667 url,
3668 title,
3669 ));
3670 return;
3671 }
3672
3673 if let Some(id) = lookup_heading_id(&label) {
3674 let url = format!("#{id}");
3675 out.push(Inline::Image(
3676 extract_attr_from_node(node),
3677 alt_inlines,
3678 url,
3679 String::new(),
3680 ));
3681 return;
3682 }
3683
3684 out.push(Inline::Str("![".to_string()));
3685 out.extend(alt_inlines);
3686 let suffix = if has_second_brackets {
3687 format!("][{second_inner}]")
3688 } else {
3689 "]".to_string()
3690 };
3691 out.push(Inline::Str(suffix));
3692}
3693
3694fn strip_inline_code_padding(s: &str) -> String {
3699 let collapsed: String = s.chars().map(|c| if c == '\n' { ' ' } else { c }).collect();
3700 collapsed.trim().to_string()
3701}
3702
3703fn math_inline(node: &SyntaxNode, kind: &'static str) -> Inline {
3704 let mut content = String::new();
3705 for el in node.children_with_tokens() {
3706 if let NodeOrToken::Token(t) = el {
3707 match t.kind() {
3708 SyntaxKind::INLINE_MATH_MARKER | SyntaxKind::DISPLAY_MATH_MARKER => {}
3709 _ => content.push_str(t.text()),
3710 }
3711 }
3712 }
3713 Inline::Math(kind, content)
3714}
3715
3716fn autolink_inline(node: &SyntaxNode) -> Inline {
3717 let mut url = String::new();
3718 for el in node.children_with_tokens() {
3719 if let NodeOrToken::Token(t) = el
3720 && t.kind() == SyntaxKind::TEXT
3721 {
3722 url.push_str(t.text());
3723 }
3724 }
3725 let is_email = !url.contains("://") && !url.starts_with("mailto:") && url.contains('@');
3728 if is_email {
3729 let attr = Attr {
3730 id: String::new(),
3731 classes: vec!["email".to_string()],
3732 kvs: Vec::new(),
3733 };
3734 let dest = format!("mailto:{url}");
3735 return Inline::Link(attr, vec![Inline::Str(url)], dest, String::new());
3736 }
3737 if !is_known_uri_scheme(&url) {
3741 return Inline::RawInline("html".to_string(), node.text().to_string());
3742 }
3743 let attr = Attr {
3744 id: String::new(),
3745 classes: vec!["uri".to_string()],
3746 kvs: Vec::new(),
3747 };
3748 Inline::Link(attr, vec![Inline::Str(url.clone())], url, String::new())
3749}
3750
3751fn is_known_uri_scheme(url: &str) -> bool {
3754 let scheme_end = url.find(':');
3755 let Some(end) = scheme_end else {
3756 return false;
3757 };
3758 let scheme = url[..end].to_ascii_lowercase();
3759 PANDOC_KNOWN_SCHEMES.binary_search(&scheme.as_str()).is_ok()
3760}
3761
3762#[rustfmt::skip]
3765const PANDOC_KNOWN_SCHEMES: &[&str] = &[
3766 "aaa", "aaas", "about", "acap", "acct", "acr",
3767 "adiumxtra", "afp", "afs", "aim", "appdata", "apt",
3768 "attachment", "aw", "barion", "beshare", "bitcoin", "blob",
3769 "bolo", "browserext", "callto", "cap", "chrome", "chrome-extension",
3770 "cid", "coap", "coaps", "com-eventbrite-attendee", "content", "crid",
3771 "cvs", "data", "dav", "dict", "dis", "dlna-playcontainer",
3772 "dlna-playsingle", "dns", "dntp", "doi", "dtn", "dvb",
3773 "ed2k", "example", "facetime", "fax", "feed", "feedready",
3774 "file", "filesystem", "finger", "fish", "ftp", "gemini",
3775 "geo", "gg", "git", "gizmoproject", "go", "gopher",
3776 "graph", "gtalk", "h323", "ham", "hcp", "http",
3777 "https", "hxxp", "hxxps", "hydrazone", "iax", "icap",
3778 "icon", "im", "imap", "info", "iotdisco", "ipn",
3779 "ipp", "ipps", "irc", "irc6", "ircs", "iris",
3780 "iris.beep", "iris.lwz", "iris.xpc", "iris.xpcs", "isbn", "isostore",
3781 "itms", "jabber", "jar", "javascript", "jms", "keyparc",
3782 "lastfm", "ldap", "ldaps", "lvlt", "magnet", "mailserver",
3783 "mailto", "maps", "market", "message", "mid", "mms",
3784 "modem", "mongodb", "moz", "ms-access", "ms-browser-extension", "ms-drive-to",
3785 "ms-enrollment", "ms-excel", "ms-gamebarservices", "ms-getoffice", "ms-help", "ms-infopath",
3786 "ms-media-stream-id", "ms-officeapp", "ms-powerpoint", "ms-project", "ms-publisher", "ms-search-repair",
3787 "ms-secondary-screen-controller", "ms-secondary-screen-setup", "ms-settings", "ms-settings-airplanemode", "ms-settings-bluetooth", "ms-settings-camera",
3788 "ms-settings-cellular", "ms-settings-cloudstorage", "ms-settings-connectabledevices", "ms-settings-displays-topology", "ms-settings-emailandaccounts", "ms-settings-language",
3789 "ms-settings-location", "ms-settings-lock", "ms-settings-nfctransactions", "ms-settings-notifications", "ms-settings-power", "ms-settings-privacy",
3790 "ms-settings-proximity", "ms-settings-screenrotation", "ms-settings-wifi", "ms-settings-workplace", "ms-spd", "ms-sttoverlay",
3791 "ms-transit-to", "ms-virtualtouchpad", "ms-visio", "ms-walk-to", "ms-whiteboard", "ms-whiteboard-cmd",
3792 "ms-word", "msnim", "msrp", "msrps", "mtqp", "mumble",
3793 "mupdate", "mvn", "news", "nfs", "ni", "nih",
3794 "nntp", "notes", "ocf", "oid", "onenote", "onenote-cmd",
3795 "opaquelocktoken", "pack", "palm", "paparazzi", "pkcs11", "platform",
3796 "pmid", "pop", "pres", "prospero", "proxy", "psyc",
3797 "pwid", "qb", "query", "redis", "rediss", "reload",
3798 "res", "resource", "rmi", "rsync", "rtmfp", "rtmp",
3799 "rtsp", "rtsps", "rtspu", "secondlife", "service", "session",
3800 "sftp", "sgn", "shttp", "sieve", "sip", "sips",
3801 "skype", "smb", "sms", "smtp", "snews", "snmp",
3802 "soap.beep", "soap.beeps", "soldat", "spotify", "ssh", "steam",
3803 "stun", "stuns", "submit", "svn", "tag", "teamspeak",
3804 "tel", "teliaeid", "telnet", "tftp", "things", "thismessage",
3805 "tip", "tn3270", "tool", "turn", "turns", "tv",
3806 "udp", "unreal", "urn", "ut2004", "v-event", "vemmi",
3807 "ventrilo", "videotex", "view-source", "vnc", "wais", "webcal",
3808 "wpid", "ws", "wss", "wtai", "wyciwyg", "xcon",
3809 "xcon-userid", "xfire", "xmlrpc.beep", "xmlrpc.beeps", "xmpp", "xri",
3810 "ymsgr", "z39.50", "z39.50r", "z39.50s",
3811];
3812
3813fn footnote_reference_inline(node: &SyntaxNode) -> Inline {
3814 let Some(label) = footnote_label(node) else {
3815 return Inline::Unsupported("FOOTNOTE_REFERENCE".to_string());
3816 };
3817 let blocks = REFS_CTX.with(|c| {
3818 c.borrow()
3819 .footnotes
3820 .get(&label)
3821 .map(|bs| bs.iter().map(clone_block).collect::<Vec<_>>())
3822 });
3823 match blocks {
3824 Some(bs) => Inline::Note(bs),
3825 None => Inline::Str(node.text().to_string()),
3828 }
3829}
3830
3831fn inline_footnote_inline(node: &SyntaxNode) -> Inline {
3832 let inlines = coalesce_inlines(inlines_from(node));
3833 if inlines.is_empty() {
3834 Inline::Note(Vec::new())
3835 } else {
3836 Inline::Note(vec![Block::Para(inlines)])
3837 }
3838}
3839
3840fn parse_link_dest(node: &SyntaxNode) -> (String, String) {
3841 let raw = node.text().to_string();
3845 let trimmed = raw.trim();
3846 if let Some(rest) = trimmed.strip_prefix('<')
3849 && let Some(end) = rest.find('>')
3850 {
3851 let url = &rest[..end];
3852 let after = rest[end + 1..].trim();
3853 let title = parse_dest_title(after);
3854 return (escape_link_dest(url), title);
3855 }
3856 let bytes = trimmed.as_bytes();
3860 let mut url_end = trimmed.len();
3861 let mut i = 0;
3862 while i < bytes.len() {
3863 if matches!(bytes[i], b' ' | b'\t' | b'\n') {
3864 let mut j = i;
3865 while j < bytes.len() && matches!(bytes[j], b' ' | b'\t' | b'\n') {
3866 j += 1;
3867 }
3868 if j < bytes.len() && matches!(bytes[j], b'"' | b'\'' | b'(') {
3869 url_end = i;
3870 break;
3871 }
3872 i = j;
3873 } else {
3874 i += 1;
3875 }
3876 }
3877 let url_raw = &trimmed[..url_end];
3878 let title = parse_dest_title(trimmed[url_end..].trim());
3879 (escape_link_dest(url_raw), title)
3880}
3881
3882fn escape_link_dest(s: &str) -> String {
3886 let mut out = String::with_capacity(s.len());
3887 for ch in s.chars() {
3888 let needs_escape = ch.is_whitespace()
3889 || matches!(
3890 ch,
3891 '<' | '>' | '|' | '"' | '{' | '}' | '[' | ']' | '^' | '`'
3892 );
3893 if needs_escape {
3894 let mut buf = [0u8; 4];
3895 for &b in ch.encode_utf8(&mut buf).as_bytes() {
3896 out.push_str(&format!("%{b:02X}"));
3897 }
3898 } else {
3899 out.push(ch);
3900 }
3901 }
3902 out
3903}
3904
3905fn parse_dest_title(s: &str) -> String {
3906 let bytes = s.as_bytes();
3907 if bytes.is_empty() {
3908 return String::new();
3909 }
3910 let (open, close) = match bytes[0] {
3911 b'"' => (b'"', b'"'),
3912 b'\'' => (b'\'', b'\''),
3913 b'(' => (b'(', b')'),
3914 _ => return String::new(),
3915 };
3916 if !s.starts_with(open as char) {
3917 return String::new();
3918 }
3919 if let Some(end) = s[1..].rfind(close as char) {
3920 return s[1..1 + end].to_string();
3921 }
3922 String::new()
3923}
3924
3925fn coalesce_inlines(input: Vec<Inline>) -> Vec<Inline> {
3928 coalesce_inlines_inner(input, true)
3929}
3930
3931fn coalesce_inlines_keep_edges(input: Vec<Inline>) -> Vec<Inline> {
3936 coalesce_inlines_inner(input, false)
3937}
3938
3939fn coalesce_inlines_inner(input: Vec<Inline>, trim_edges: bool) -> Vec<Inline> {
3940 let mut out: Vec<Inline> = Vec::with_capacity(input.len());
3941 for inline in input {
3942 if let Inline::Str(s) = inline {
3943 if let Some(Inline::Str(prev)) = out.last_mut() {
3944 prev.push_str(&s);
3945 } else {
3946 out.push(Inline::Str(s));
3947 }
3948 } else if let Inline::Space = inline {
3949 if matches!(out.last(), Some(Inline::Space) | Some(Inline::SoftBreak)) {
3952 continue;
3953 }
3954 out.push(Inline::Space);
3955 } else if let Inline::SoftBreak = inline {
3956 if matches!(out.last(), Some(Inline::Space)) {
3959 out.pop();
3960 }
3961 out.push(Inline::SoftBreak);
3962 } else {
3963 out.push(inline);
3964 }
3965 }
3966 if trim_edges {
3967 while matches!(out.first(), Some(Inline::Space) | Some(Inline::SoftBreak)) {
3970 out.remove(0);
3971 }
3972 while matches!(out.last(), Some(Inline::Space) | Some(Inline::SoftBreak)) {
3973 out.pop();
3974 }
3975 }
3976 for inline in out.iter_mut() {
3980 if let Inline::Str(s) = inline {
3981 let mut t = smart_intraword_apostrophe(s);
3982 t = smart_dashes_and_ellipsis(&t);
3983 *s = t;
3984 }
3985 }
3986 let out = smart_quote_pairs(out);
3987 apply_abbreviations(out)
3988}
3989
3990const PANDOC_ABBREVIATIONS: &[&str] = &[
3996 "Apr.", "Aug.", "Bros.", "Capt.", "Co.", "Corp.", "Dec.", "Dr.", "Feb.", "Fr.", "Gen.", "Gov.",
3997 "Hon.", "Inc.", "Jan.", "Jr.", "Jul.", "Jun.", "Ltd.", "M.A.", "M.D.", "Mar.", "Mr.", "Mrs.",
3998 "Ms.", "No.", "Nov.", "Oct.", "Ph.D.", "Pres.", "Prof.", "Rep.", "Rev.", "Sen.", "Sep.",
3999 "Sept.", "Sgt.", "Sr.", "St.", "aet.", "aetat.", "al.", "bk.", "c.", "cf.", "ch.", "chap.",
4000 "chs.", "col.", "cp.", "d.", "e.g.", "ed.", "eds.", "esp.", "f.", "fasc.", "ff.", "fig.",
4001 "fl.", "fol.", "fols.", "i.e.", "ill.", "incl.", "n.", "n.b.", "nn.", "p.", "pp.", "pt.",
4002 "q.v.", "s.v.", "s.vv.", "saec.", "sec.", "univ.", "viz.", "vol.", "vs.",
4003];
4004
4005fn matches_abbreviation_suffix(s: &str) -> bool {
4006 for &abbr in PANDOC_ABBREVIATIONS {
4007 if let Some(prefix) = s.strip_suffix(abbr) {
4008 if prefix.is_empty() {
4009 return true;
4010 }
4011 let last = prefix.chars().next_back().unwrap();
4012 if !last.is_alphanumeric() && last != '.' {
4013 return true;
4014 }
4015 }
4016 }
4017 false
4018}
4019
4020fn apply_abbreviations(inlines: Vec<Inline>) -> Vec<Inline> {
4030 let inlines: Vec<Inline> = inlines
4031 .into_iter()
4032 .map(|inline| match inline {
4033 Inline::Quoted(kind, content) => Inline::Quoted(kind, apply_abbreviations(content)),
4034 other => other,
4035 })
4036 .collect();
4037 let mut out: Vec<Inline> = Vec::with_capacity(inlines.len());
4038 let mut iter = inlines.into_iter().peekable();
4039 while let Some(inline) = iter.next() {
4040 if let Inline::Str(ref s) = inline
4041 && matches_abbreviation_suffix(s)
4042 && matches!(iter.peek(), Some(Inline::Space))
4043 {
4044 iter.next();
4046 let Inline::Str(mut new_s) = inline else {
4047 unreachable!()
4048 };
4049 new_s.push('\u{a0}');
4050 if let Some(Inline::Str(_)) = iter.peek()
4052 && let Some(Inline::Str(next_s)) = iter.next()
4053 {
4054 new_s.push_str(&next_s);
4055 }
4056 out.push(Inline::Str(new_s));
4057 } else {
4058 out.push(inline);
4059 }
4060 }
4061 out
4062}
4063
4064fn smart_quote_pairs(inlines: Vec<Inline>) -> Vec<Inline> {
4065 fn is_boundary(prev: Option<&Inline>) -> bool {
4073 match prev {
4074 None => true,
4075 Some(Inline::Space | Inline::SoftBreak | Inline::LineBreak) => true,
4076 Some(Inline::Str(s)) => s.chars().last().is_some_and(|c| !c.is_alphanumeric()),
4077 _ => false,
4078 }
4079 }
4080 let mut out: Vec<Inline> = Vec::with_capacity(inlines.len());
4081 let n = inlines.len();
4082 let mut consumed = vec![false; n];
4083 for i in 0..n {
4084 if consumed[i] {
4085 continue;
4086 }
4087 let Inline::Str(s) = &inlines[i] else {
4089 out.push(clone_inline(&inlines[i]));
4090 consumed[i] = true;
4091 continue;
4092 };
4093 let first = s.chars().next();
4094 let quote = match first {
4095 Some('"') => Some('"'),
4096 Some('\'') => Some('\''),
4097 _ => None,
4098 };
4099 let prev_is_boundary = is_boundary(out.last());
4105 let str_has_more = s.chars().count() > 1;
4106 let next_char_is_word = s.chars().nth(1).is_some_and(|c| !c.is_whitespace());
4107 let next_is_markup_atom = matches!(
4108 inlines.get(i + 1),
4109 Some(
4110 Inline::Emph(_)
4111 | Inline::Strong(_)
4112 | Inline::Strikeout(_)
4113 | Inline::Superscript(_)
4114 | Inline::Subscript(_)
4115 | Inline::Code(_, _)
4116 )
4117 );
4118 let attaches =
4119 (str_has_more && next_char_is_word) || (!str_has_more && next_is_markup_atom);
4120 if let Some(q) = quote
4121 && prev_is_boundary
4122 && attaches
4123 {
4124 if let Some(close_idx) = find_matching_close(&inlines, i, q, &consumed) {
4126 let kind = if q == '"' {
4130 "DoubleQuote"
4131 } else {
4132 "SingleQuote"
4133 };
4134 let mut content: Vec<Inline> = Vec::new();
4135 for j in i..=close_idx {
4136 if consumed[j] {
4137 continue;
4138 }
4139 let inline = &inlines[j];
4140 if j == i && j == close_idx {
4141 if let Inline::Str(s) = inline {
4143 let mut chars: Vec<char> = s.chars().collect();
4144 if chars.len() >= 2 {
4145 chars.remove(0);
4146 chars.pop();
4147 }
4148 let stripped: String = chars.into_iter().collect();
4149 if !stripped.is_empty() {
4150 content.push(Inline::Str(stripped));
4151 }
4152 }
4153 } else if j == i {
4154 if let Inline::Str(s) = inline {
4155 let stripped: String = s.chars().skip(1).collect();
4156 if !stripped.is_empty() {
4157 content.push(Inline::Str(stripped));
4158 }
4159 }
4160 } else if j == close_idx {
4161 if let Inline::Str(s) = inline {
4162 let mut stripped: String = s.chars().collect();
4163 stripped.pop();
4164 if !stripped.is_empty() {
4165 content.push(Inline::Str(stripped));
4166 }
4167 }
4168 } else {
4169 content.push(clone_inline(inline));
4170 }
4171 consumed[j] = true;
4172 }
4173 out.push(Inline::Quoted(kind, content));
4174 continue;
4175 }
4176 }
4177 out.push(clone_inline(&inlines[i]));
4178 consumed[i] = true;
4179 }
4180 out
4181}
4182
4183fn find_matching_close(
4184 inlines: &[Inline],
4185 open_idx: usize,
4186 quote: char,
4187 consumed: &[bool],
4188) -> Option<usize> {
4189 if let Inline::Str(s) = &inlines[open_idx]
4191 && s.chars().count() >= 3
4192 && s.ends_with(quote)
4193 {
4194 let next = inlines.get(open_idx + 1);
4196 let after_is_boundary = match next {
4197 None => true,
4198 Some(Inline::Space | Inline::SoftBreak | Inline::LineBreak) => true,
4199 Some(Inline::Str(s)) => s.chars().next().is_some_and(|c| !c.is_alphanumeric()),
4200 _ => false,
4201 };
4202 if after_is_boundary {
4203 return Some(open_idx);
4204 }
4205 }
4206 let n = inlines.len();
4209 let mut j = open_idx + 1;
4210 while j < n {
4211 if consumed[j] {
4212 return None;
4213 }
4214 match &inlines[j] {
4215 Inline::Str(s) => {
4216 if s.ends_with(quote) {
4217 let next = inlines.get(j + 1);
4218 let after_is_boundary = match next {
4219 None => true,
4220 Some(Inline::Space | Inline::SoftBreak | Inline::LineBreak) => true,
4221 Some(Inline::Str(s)) => {
4222 s.chars().next().is_some_and(|c| !c.is_alphanumeric())
4223 }
4224 _ => false,
4225 };
4226 if after_is_boundary {
4227 return Some(j);
4228 }
4229 }
4230 }
4231 Inline::Space | Inline::SoftBreak | Inline::LineBreak => {}
4232 _ => {}
4234 }
4235 j += 1;
4236 if j - open_idx > 32 {
4238 return None;
4239 }
4240 }
4241 None
4242}
4243
4244fn clone_inline(inline: &Inline) -> Inline {
4245 match inline {
4246 Inline::Str(s) => Inline::Str(s.clone()),
4247 Inline::Space => Inline::Space,
4248 Inline::SoftBreak => Inline::SoftBreak,
4249 Inline::LineBreak => Inline::LineBreak,
4250 Inline::Emph(c) => Inline::Emph(c.iter().map(clone_inline).collect()),
4251 Inline::Strong(c) => Inline::Strong(c.iter().map(clone_inline).collect()),
4252 Inline::Strikeout(c) => Inline::Strikeout(c.iter().map(clone_inline).collect()),
4253 Inline::Superscript(c) => Inline::Superscript(c.iter().map(clone_inline).collect()),
4254 Inline::Subscript(c) => Inline::Subscript(c.iter().map(clone_inline).collect()),
4255 Inline::Code(a, s) => Inline::Code(a.clone(), s.clone()),
4256 Inline::Link(a, t, u, ti) => Inline::Link(
4257 a.clone(),
4258 t.iter().map(clone_inline).collect(),
4259 u.clone(),
4260 ti.clone(),
4261 ),
4262 Inline::Image(a, t, u, ti) => Inline::Image(
4263 a.clone(),
4264 t.iter().map(clone_inline).collect(),
4265 u.clone(),
4266 ti.clone(),
4267 ),
4268 Inline::Math(k, c) => Inline::Math(k, c.clone()),
4269 Inline::Span(a, c) => Inline::Span(a.clone(), c.iter().map(clone_inline).collect()),
4270 Inline::RawInline(f, c) => Inline::RawInline(f.clone(), c.clone()),
4271 Inline::Quoted(k, c) => Inline::Quoted(k, c.iter().map(clone_inline).collect()),
4272 Inline::Note(blocks) => Inline::Note(blocks.iter().map(clone_block).collect()),
4273 Inline::Cite(citations, text) => Inline::Cite(
4274 citations
4275 .iter()
4276 .map(|c| Citation {
4277 id: c.id.clone(),
4278 prefix: c.prefix.iter().map(clone_inline).collect(),
4279 suffix: c.suffix.iter().map(clone_inline).collect(),
4280 mode: c.mode,
4281 note_num: c.note_num,
4282 hash: c.hash,
4283 })
4284 .collect(),
4285 text.iter().map(clone_inline).collect(),
4286 ),
4287 Inline::Unsupported(s) => Inline::Unsupported(s.clone()),
4288 }
4289}
4290
4291fn clone_block(b: &Block) -> Block {
4292 match b {
4293 Block::Para(c) => Block::Para(c.iter().map(clone_inline).collect()),
4294 Block::Plain(c) => Block::Plain(c.iter().map(clone_inline).collect()),
4295 Block::Header(lvl, a, c) => {
4296 Block::Header(*lvl, a.clone(), c.iter().map(clone_inline).collect())
4297 }
4298 Block::BlockQuote(blocks) => Block::BlockQuote(blocks.iter().map(clone_block).collect()),
4299 Block::CodeBlock(a, s) => Block::CodeBlock(a.clone(), s.clone()),
4300 Block::HorizontalRule => Block::HorizontalRule,
4301 Block::BulletList(items) => Block::BulletList(
4302 items
4303 .iter()
4304 .map(|item| item.iter().map(clone_block).collect())
4305 .collect(),
4306 ),
4307 Block::OrderedList(start, style, delim, items) => Block::OrderedList(
4308 *start,
4309 style,
4310 delim,
4311 items
4312 .iter()
4313 .map(|item| item.iter().map(clone_block).collect())
4314 .collect(),
4315 ),
4316 Block::RawBlock(f, c) => Block::RawBlock(f.clone(), c.clone()),
4317 Block::Table(_) => Block::Unsupported("Table".to_string()),
4318 Block::Div(a, blocks) => Block::Div(a.clone(), blocks.iter().map(clone_block).collect()),
4319 Block::LineBlock(lines) => Block::LineBlock(
4320 lines
4321 .iter()
4322 .map(|line| line.iter().map(clone_inline).collect())
4323 .collect(),
4324 ),
4325 Block::DefinitionList(items) => Block::DefinitionList(
4326 items
4327 .iter()
4328 .map(|(term, defs)| {
4329 (
4330 term.iter().map(clone_inline).collect(),
4331 defs.iter()
4332 .map(|d| d.iter().map(clone_block).collect())
4333 .collect(),
4334 )
4335 })
4336 .collect(),
4337 ),
4338 Block::Figure(a, caption, body) => Block::Figure(
4339 a.clone(),
4340 caption.iter().map(clone_block).collect(),
4341 body.iter().map(clone_block).collect(),
4342 ),
4343 Block::Unsupported(s) => Block::Unsupported(s.clone()),
4344 }
4345}
4346
4347fn smart_dashes_and_ellipsis(s: &str) -> String {
4348 if !s.contains(['-', '.']) {
4349 return s.to_string();
4350 }
4351 let bytes = s.as_bytes();
4352 let mut out = String::with_capacity(s.len());
4353 let mut i = 0usize;
4354 while i < bytes.len() {
4355 if bytes[i] == b'-' {
4356 if i + 2 < bytes.len() && bytes[i + 1] == b'-' && bytes[i + 2] == b'-' {
4357 out.push('\u{2014}');
4358 i += 3;
4359 continue;
4360 }
4361 if i + 1 < bytes.len() && bytes[i + 1] == b'-' {
4362 out.push('\u{2013}');
4363 i += 2;
4364 continue;
4365 }
4366 }
4367 if bytes[i] == b'.' && i + 2 < bytes.len() && bytes[i + 1] == b'.' && bytes[i + 2] == b'.' {
4368 out.push('\u{2026}');
4369 i += 3;
4370 continue;
4371 }
4372 let len = utf8_char_len(bytes[i]);
4374 out.push_str(&s[i..i + len]);
4375 i += len;
4376 }
4377 out
4378}
4379
4380fn utf8_char_len(b: u8) -> usize {
4381 if b < 0xc0 {
4383 1
4384 } else if b < 0xe0 {
4385 2
4386 } else if b < 0xf0 {
4387 3
4388 } else {
4389 4
4390 }
4391}
4392
4393fn smart_intraword_apostrophe(s: &str) -> String {
4394 if !s.contains('\'') {
4395 return s.to_string();
4396 }
4397 let chars: Vec<char> = s.chars().collect();
4398 let mut out = String::with_capacity(s.len());
4399 for (i, &c) in chars.iter().enumerate() {
4400 if c == '\'' {
4401 let prev = i.checked_sub(1).map(|j| chars[j]);
4402 let next = chars.get(i + 1).copied();
4403 let prev_word = prev.is_some_and(is_word_char);
4404 let next_word = next.is_some_and(is_word_char);
4405 if prev_word && next_word {
4406 out.push('\u{2019}');
4407 continue;
4408 }
4409 }
4410 out.push(c);
4411 }
4412 out
4413}
4414
4415fn is_word_char(c: char) -> bool {
4416 c.is_alphanumeric()
4417}
4418
4419fn inlines_to_plaintext(inlines: &[Inline]) -> String {
4420 let mut s = String::new();
4421 for i in inlines {
4422 match i {
4423 Inline::Str(t) => s.push_str(t),
4424 Inline::Space | Inline::SoftBreak => s.push(' '),
4425 Inline::LineBreak => s.push(' '),
4426 Inline::Emph(children)
4427 | Inline::Strong(children)
4428 | Inline::Strikeout(children)
4429 | Inline::Superscript(children)
4430 | Inline::Subscript(children) => s.push_str(&inlines_to_plaintext(children)),
4431 Inline::Code(_, c) => s.push_str(c),
4432 Inline::Link(_, alt, _, _) | Inline::Image(_, alt, _, _) => {
4433 s.push_str(&inlines_to_plaintext(alt))
4434 }
4435 Inline::Math(_, c) => s.push_str(c),
4436 Inline::Span(_, children) => s.push_str(&inlines_to_plaintext(children)),
4437 Inline::RawInline(_, _) => {}
4438 Inline::Quoted(_, children) => s.push_str(&inlines_to_plaintext(children)),
4439 Inline::Note(_) => {}
4440 Inline::Cite(_, text) => s.push_str(&inlines_to_plaintext(text)),
4441 Inline::Unsupported(_) => {}
4442 }
4443 }
4444 s
4445}
4446
4447fn pandoc_slugify(text: &str) -> String {
4448 let mut out = String::new();
4451 let mut prev_dash = false;
4452 for ch in text.chars() {
4453 if ch.is_whitespace() {
4454 if !out.is_empty() && !prev_dash {
4455 out.push('-');
4456 prev_dash = true;
4457 }
4458 continue;
4459 }
4460 for lc in ch.to_lowercase() {
4461 if lc.is_alphanumeric() || lc == '_' || lc == '-' || lc == '.' {
4462 out.push(lc);
4463 prev_dash = lc == '-';
4464 }
4465 }
4466 }
4467 while out.ends_with('-') {
4468 out.pop();
4469 }
4470 out
4471}
4472
4473impl Attr {
4474 fn with_id(id: String) -> Self {
4475 Self {
4476 id,
4477 classes: Vec::new(),
4478 kvs: Vec::new(),
4479 }
4480 }
4481}
4482
4483fn write_block(b: &Block, out: &mut String) {
4486 match b {
4487 Block::Para(inlines) => {
4488 out.push_str("Para [");
4489 write_inline_list(inlines, out);
4490 out.push_str(" ]");
4491 }
4492 Block::Plain(inlines) => {
4493 out.push_str("Plain [");
4494 write_inline_list(inlines, out);
4495 out.push_str(" ]");
4496 }
4497 Block::Header(level, attr, inlines) => {
4498 out.push_str(&format!("Header {level} ("));
4499 write_attr(attr, out);
4500 out.push_str(") [");
4501 write_inline_list(inlines, out);
4502 out.push_str(" ]");
4503 }
4504 Block::BlockQuote(blocks) => {
4505 out.push_str("BlockQuote [");
4506 write_block_list(blocks, out);
4507 out.push_str(" ]");
4508 }
4509 Block::CodeBlock(attr, content) => {
4510 out.push_str("CodeBlock (");
4511 write_attr(attr, out);
4512 out.push_str(") ");
4513 write_haskell_string(content, out);
4514 }
4515 Block::HorizontalRule => out.push_str("HorizontalRule"),
4516 Block::BulletList(items) => {
4517 out.push_str("BulletList [");
4518 for (i, item) in items.iter().enumerate() {
4519 if i > 0 {
4520 out.push(',');
4521 }
4522 out.push_str(" [");
4523 write_block_list(item, out);
4524 out.push_str(" ]");
4525 }
4526 out.push_str(" ]");
4527 }
4528 Block::OrderedList(start, style, delim, items) => {
4529 out.push_str(&format!("OrderedList ( {start} , {style} , {delim} ) ["));
4530 for (i, item) in items.iter().enumerate() {
4531 if i > 0 {
4532 out.push(',');
4533 }
4534 out.push_str(" [");
4535 write_block_list(item, out);
4536 out.push_str(" ]");
4537 }
4538 out.push_str(" ]");
4539 }
4540 Block::RawBlock(format, content) => {
4541 out.push_str("RawBlock ( Format ");
4542 write_haskell_string(format, out);
4543 out.push_str(" ) ");
4544 write_haskell_string(content, out);
4545 }
4546 Block::Table(data) => {
4547 write_table(data, out);
4548 }
4549 Block::Div(attr, blocks) => {
4550 out.push_str("Div (");
4551 write_attr(attr, out);
4552 out.push_str(") [");
4553 write_block_list(blocks, out);
4554 out.push_str(" ]");
4555 }
4556 Block::LineBlock(lines) => {
4557 out.push_str("LineBlock [");
4558 for (i, line) in lines.iter().enumerate() {
4559 if i > 0 {
4560 out.push(',');
4561 }
4562 out.push_str(" [");
4563 write_inline_list(line, out);
4564 out.push_str(" ]");
4565 }
4566 out.push_str(" ]");
4567 }
4568 Block::DefinitionList(items) => {
4569 out.push_str("DefinitionList [");
4570 for (i, (term, defs)) in items.iter().enumerate() {
4571 if i > 0 {
4572 out.push(',');
4573 }
4574 out.push_str(" ( [");
4575 write_inline_list(term, out);
4576 out.push_str(" ] , [");
4577 for (j, def) in defs.iter().enumerate() {
4578 if j > 0 {
4579 out.push(',');
4580 }
4581 out.push_str(" [");
4582 write_block_list(def, out);
4583 out.push_str(" ]");
4584 }
4585 out.push_str(" ] )");
4586 }
4587 out.push_str(" ]");
4588 }
4589 Block::Figure(attr, caption, body) => {
4590 out.push_str("Figure (");
4591 write_attr(attr, out);
4592 out.push_str(") ( Caption Nothing [");
4593 write_block_list(caption, out);
4594 out.push_str(" ] ) [");
4595 write_block_list(body, out);
4596 out.push_str(" ]");
4597 }
4598 Block::Unsupported(name) => {
4599 out.push_str(&format!("Unsupported {name:?}"));
4600 }
4601 }
4602}
4603
4604fn write_table(data: &TableData, out: &mut String) {
4605 out.push_str("Table (");
4606 write_attr(&data.attr, out);
4607 out.push_str(") ( Caption Nothing [");
4608 if !data.caption.is_empty() {
4609 out.push_str(" Plain [");
4610 write_inline_list(&data.caption, out);
4611 out.push_str(" ]");
4612 }
4613 out.push_str(" ] ) [");
4614 for (i, align) in data.aligns.iter().enumerate() {
4615 if i > 0 {
4616 out.push(',');
4617 }
4618 let width = data.widths.get(i).copied().unwrap_or(None);
4619 match width {
4620 None => out.push_str(&format!(" ( {align} , ColWidthDefault )")),
4621 Some(w) => out.push_str(&format!(" ( {align} , ColWidth {} )", show_double(w))),
4622 }
4623 }
4624 out.push_str(" ] ( TableHead ( \"\" , [ ] , [ ] ) [");
4625 for (i, row) in data.head_rows.iter().enumerate() {
4626 if i > 0 {
4627 out.push(',');
4628 }
4629 out.push(' ');
4630 write_table_row(row, out);
4631 }
4632 out.push_str(" ] ) [ TableBody ( \"\" , [ ] , [ ] ) ( RowHeadColumns 0 ) [ ] [");
4633 for (i, row) in data.body_rows.iter().enumerate() {
4634 if i > 0 {
4635 out.push(',');
4636 }
4637 out.push(' ');
4638 write_table_row(row, out);
4639 }
4640 out.push_str(" ] ] ( TableFoot ( \"\" , [ ] , [ ] ) [");
4641 for (i, row) in data.foot_rows.iter().enumerate() {
4642 if i > 0 {
4643 out.push(',');
4644 }
4645 out.push(' ');
4646 write_table_row(row, out);
4647 }
4648 out.push_str(" ] )");
4649}
4650
4651fn write_table_row(cells: &[GridCell], out: &mut String) {
4652 out.push_str("Row ( \"\" , [ ] , [ ] ) [");
4653 for (i, cell) in cells.iter().enumerate() {
4654 if i > 0 {
4655 out.push(',');
4656 }
4657 out.push_str(&format!(
4658 " Cell ( \"\" , [ ] , [ ] ) AlignDefault ( RowSpan {} ) ( ColSpan {} ) [",
4659 cell.row_span, cell.col_span
4660 ));
4661 if !cell.blocks.is_empty() {
4662 write_block_list(&cell.blocks, out);
4663 }
4664 out.push_str(" ]");
4665 }
4666 out.push_str(" ]");
4667}
4668
4669fn write_block_list(blocks: &[Block], out: &mut String) {
4670 for (i, b) in blocks.iter().enumerate() {
4671 if i > 0 {
4672 out.push(',');
4673 }
4674 out.push(' ');
4675 write_block(b, out);
4676 }
4677}
4678
4679fn write_inline_list(inlines: &[Inline], out: &mut String) {
4680 for (i, inline) in inlines.iter().enumerate() {
4681 if i > 0 {
4682 out.push(',');
4683 }
4684 out.push(' ');
4685 write_inline(inline, out);
4686 }
4687}
4688
4689fn write_inline(inline: &Inline, out: &mut String) {
4690 match inline {
4691 Inline::Str(s) => {
4692 out.push_str("Str ");
4693 write_haskell_string(s, out);
4694 }
4695 Inline::Space => out.push_str("Space"),
4696 Inline::SoftBreak => out.push_str("SoftBreak"),
4697 Inline::LineBreak => out.push_str("LineBreak"),
4698 Inline::Emph(children) => {
4699 out.push_str("Emph [");
4700 write_inline_list(children, out);
4701 out.push_str(" ]");
4702 }
4703 Inline::Strong(children) => {
4704 out.push_str("Strong [");
4705 write_inline_list(children, out);
4706 out.push_str(" ]");
4707 }
4708 Inline::Strikeout(children) => {
4709 out.push_str("Strikeout [");
4710 write_inline_list(children, out);
4711 out.push_str(" ]");
4712 }
4713 Inline::Superscript(children) => {
4714 out.push_str("Superscript [");
4715 write_inline_list(children, out);
4716 out.push_str(" ]");
4717 }
4718 Inline::Subscript(children) => {
4719 out.push_str("Subscript [");
4720 write_inline_list(children, out);
4721 out.push_str(" ]");
4722 }
4723 Inline::Code(attr, content) => {
4724 out.push_str("Code (");
4725 write_attr(attr, out);
4726 out.push_str(") ");
4727 write_haskell_string(content, out);
4728 }
4729 Inline::Link(attr, text, url, title) => {
4730 out.push_str("Link (");
4731 write_attr(attr, out);
4732 out.push_str(") [");
4733 write_inline_list(text, out);
4734 out.push_str(" ] ( ");
4735 write_haskell_string(url, out);
4736 out.push_str(" , ");
4737 write_haskell_string(title, out);
4738 out.push_str(" )");
4739 }
4740 Inline::Image(attr, alt, url, title) => {
4741 out.push_str("Image (");
4742 write_attr(attr, out);
4743 out.push_str(") [");
4744 write_inline_list(alt, out);
4745 out.push_str(" ] ( ");
4746 write_haskell_string(url, out);
4747 out.push_str(" , ");
4748 write_haskell_string(title, out);
4749 out.push_str(" )");
4750 }
4751 Inline::Math(kind, content) => {
4752 out.push_str("Math ");
4753 out.push_str(kind);
4754 out.push(' ');
4755 write_haskell_string(content, out);
4756 }
4757 Inline::Span(attr, children) => {
4758 out.push_str("Span (");
4759 write_attr(attr, out);
4760 out.push_str(") [");
4761 write_inline_list(children, out);
4762 out.push_str(" ]");
4763 }
4764 Inline::RawInline(format, content) => {
4765 out.push_str("RawInline ( Format ");
4766 write_haskell_string(format, out);
4767 out.push_str(" ) ");
4768 write_haskell_string(content, out);
4769 }
4770 Inline::Quoted(kind, children) => {
4771 out.push_str("Quoted ");
4772 out.push_str(kind);
4773 out.push_str(" [");
4774 write_inline_list(children, out);
4775 out.push_str(" ]");
4776 }
4777 Inline::Note(blocks) => {
4778 out.push_str("Note [");
4779 write_block_list(blocks, out);
4780 out.push_str(" ]");
4781 }
4782 Inline::Cite(citations, text) => {
4783 out.push_str("Cite [");
4784 for (i, c) in citations.iter().enumerate() {
4785 if i > 0 {
4786 out.push(',');
4787 }
4788 out.push_str(" Citation { citationId = ");
4789 write_haskell_string(&c.id, out);
4790 out.push_str(" , citationPrefix = [");
4791 write_inline_list(&c.prefix, out);
4792 out.push_str(" ] , citationSuffix = [");
4793 write_inline_list(&c.suffix, out);
4794 out.push_str(" ] , citationMode = ");
4795 out.push_str(match c.mode {
4796 CitationMode::AuthorInText => "AuthorInText",
4797 CitationMode::NormalCitation => "NormalCitation",
4798 CitationMode::SuppressAuthor => "SuppressAuthor",
4799 });
4800 out.push_str(&format!(
4801 " , citationNoteNum = {} , citationHash = {} }}",
4802 c.note_num, c.hash
4803 ));
4804 }
4805 out.push_str(" ] [");
4806 write_inline_list(text, out);
4807 out.push_str(" ]");
4808 }
4809 Inline::Unsupported(name) => {
4810 out.push_str(&format!("Unsupported {name:?}"));
4811 }
4812 }
4813}
4814
4815fn write_attr(attr: &Attr, out: &mut String) {
4816 out.push(' ');
4817 write_haskell_string(&attr.id, out);
4818 out.push_str(" , [");
4819 for (i, c) in attr.classes.iter().enumerate() {
4820 if i > 0 {
4821 out.push(',');
4822 }
4823 out.push(' ');
4824 write_haskell_string(c, out);
4825 }
4826 if !attr.classes.is_empty() {
4827 out.push(' ');
4828 }
4829 out.push_str("] , [");
4830 for (i, (k, v)) in attr.kvs.iter().enumerate() {
4831 if i > 0 {
4832 out.push(',');
4833 }
4834 out.push_str(" ( ");
4835 write_haskell_string(k, out);
4836 out.push_str(" , ");
4837 write_haskell_string(v, out);
4838 out.push_str(" )");
4839 }
4840 if !attr.kvs.is_empty() {
4841 out.push(' ');
4842 }
4843 out.push_str("] ");
4844}
4845
4846fn write_haskell_string(s: &str, out: &mut String) {
4847 out.push('"');
4848 let mut prev_was_numeric_escape = false;
4849 for ch in s.chars() {
4850 let code = ch as u32;
4851 let is_ascii_printable = (0x20..0x7f).contains(&code);
4852 match ch {
4853 '"' => {
4854 out.push_str("\\\"");
4855 prev_was_numeric_escape = false;
4856 }
4857 '\\' => {
4858 out.push_str("\\\\");
4859 prev_was_numeric_escape = false;
4860 }
4861 '\n' => {
4862 out.push_str("\\n");
4863 prev_was_numeric_escape = false;
4864 }
4865 '\t' => {
4866 out.push_str("\\t");
4867 prev_was_numeric_escape = false;
4868 }
4869 '\r' => {
4870 out.push_str("\\r");
4871 prev_was_numeric_escape = false;
4872 }
4873 _ if is_ascii_printable => {
4874 if prev_was_numeric_escape && ch.is_ascii_digit() {
4877 out.push_str("\\&");
4878 }
4879 out.push(ch);
4880 prev_was_numeric_escape = false;
4881 }
4882 _ => {
4883 out.push('\\');
4885 out.push_str(&code.to_string());
4886 prev_was_numeric_escape = true;
4887 }
4888 }
4889 }
4890 out.push('"');
4891}