use std::cell::RefCell;
use std::collections::{HashMap, HashSet};
use crate::SyntaxNode;
use crate::syntax::SyntaxKind;
use rowan::NodeOrToken;
#[derive(Default)]
struct RefsCtx {
refs: HashMap<String, (String, String)>,
heading_ids: HashSet<String>,
heading_id_by_offset: HashMap<u32, String>,
footnotes: HashMap<String, Vec<Block>>,
example_label_to_num: HashMap<String, usize>,
example_list_start_by_offset: HashMap<u32, usize>,
cite_note_num_by_offset: HashMap<u32, i64>,
}
thread_local! {
static REFS_CTX: RefCell<RefsCtx> = RefCell::new(RefsCtx::default());
}
pub fn to_pandoc_ast(tree: &SyntaxNode) -> String {
let ctx = build_refs_ctx(tree);
REFS_CTX.with(|c| *c.borrow_mut() = ctx);
let blocks = blocks_from_doc(tree);
let mut out = String::new();
out.push('[');
for (i, b) in blocks.iter().enumerate() {
if i > 0 {
out.push(',');
}
out.push(' ');
write_block(b, &mut out);
}
out.push_str(" ]");
REFS_CTX.with(|c| *c.borrow_mut() = RefsCtx::default());
out
}
fn build_refs_ctx(tree: &SyntaxNode) -> RefsCtx {
let mut ctx = RefsCtx::default();
collect_cite_note_nums(tree, &mut ctx);
let mut example_counter: usize = 0;
collect_example_numbering(tree, &mut ctx, &mut example_counter);
REFS_CTX.with(|c| {
let mut borrowed = c.borrow_mut();
borrowed.cite_note_num_by_offset = ctx.cite_note_num_by_offset.clone();
borrowed.example_label_to_num = ctx.example_label_to_num.clone();
borrowed.example_list_start_by_offset = ctx.example_list_start_by_offset.clone();
});
let mut seen_ids: HashMap<String, u32> = HashMap::new();
collect_refs_and_headings(tree, &mut ctx, &mut seen_ids);
ctx
}
fn collect_cite_note_nums(tree: &SyntaxNode, ctx: &mut RefsCtx) {
let mut footnote_def_nodes: HashMap<String, SyntaxNode> = HashMap::new();
for child in tree.descendants() {
if child.kind() == SyntaxKind::FOOTNOTE_DEFINITION
&& let Some(label) = footnote_label(&child)
{
footnote_def_nodes.entry(label).or_insert(child);
}
}
let mut counter: i64 = 0;
for child in tree.children() {
if child.kind() == SyntaxKind::FOOTNOTE_DEFINITION {
continue;
}
visit_for_cite_nums(&child, &footnote_def_nodes, &mut counter, None, ctx);
}
}
fn visit_for_cite_nums(
node: &SyntaxNode,
fn_defs: &HashMap<String, SyntaxNode>,
counter: &mut i64,
in_fn: Option<i64>,
ctx: &mut RefsCtx,
) {
for el in node.children_with_tokens() {
if let NodeOrToken::Node(n) = el {
match n.kind() {
SyntaxKind::CITATION => {
let offset: u32 = n.text_range().start().into();
let num = if let Some(fn_num) = in_fn {
fn_num
} else {
*counter += 1;
*counter
};
ctx.cite_note_num_by_offset.insert(offset, num);
}
SyntaxKind::FOOTNOTE_REFERENCE => {
if in_fn.is_none() {
*counter += 1;
let fn_num = *counter;
if let Some(label) = footnote_label(&n)
&& let Some(def) = fn_defs.get(&label)
{
visit_for_cite_nums(def, fn_defs, counter, Some(fn_num), ctx);
}
}
}
_ => visit_for_cite_nums(&n, fn_defs, counter, in_fn, ctx),
}
}
}
}
fn collect_example_numbering(node: &SyntaxNode, ctx: &mut RefsCtx, counter: &mut usize) {
for child in node.children() {
if child.kind() == SyntaxKind::LIST && list_is_example(&child) {
let list_offset: u32 = child.text_range().start().into();
ctx.example_list_start_by_offset
.insert(list_offset, *counter + 1);
for item in child
.children()
.filter(|c| c.kind() == SyntaxKind::LIST_ITEM)
{
*counter += 1;
if let Some(label) = example_item_label(&item) {
ctx.example_label_to_num.entry(label).or_insert(*counter);
}
}
collect_example_numbering(&child, ctx, counter);
} else {
collect_example_numbering(&child, ctx, counter);
}
}
}
fn list_is_example(list: &SyntaxNode) -> bool {
let Some(item) = list.children().find(|c| c.kind() == SyntaxKind::LIST_ITEM) else {
return false;
};
let marker = list_item_marker_text(&item);
let trimmed = marker.trim();
let body = if let Some(inner) = trimmed.strip_prefix('(').and_then(|s| s.strip_suffix(')')) {
inner
} else if let Some(inner) = trimmed.strip_suffix(')') {
inner
} else if let Some(inner) = trimmed.strip_suffix('.') {
inner
} else {
trimmed
};
body.starts_with('@')
&& body[1..]
.chars()
.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
}
fn list_item_marker_text(item: &SyntaxNode) -> String {
item.children_with_tokens()
.filter_map(|el| el.into_token())
.find(|t| t.kind() == SyntaxKind::LIST_MARKER)
.map(|t| t.text().to_string())
.unwrap_or_default()
}
fn example_item_label(item: &SyntaxNode) -> Option<String> {
let marker = list_item_marker_text(item);
let trimmed = marker.trim();
let body = trimmed
.strip_prefix('(')
.and_then(|s| s.strip_suffix(')'))
.or_else(|| trimmed.strip_suffix(')'))
.or_else(|| trimmed.strip_suffix('.'))
.unwrap_or(trimmed);
let label = body.strip_prefix('@')?;
if label.is_empty() {
None
} else {
Some(label.to_string())
}
}
fn collect_refs_and_headings(
node: &SyntaxNode,
ctx: &mut RefsCtx,
seen_ids: &mut HashMap<String, u32>,
) {
for child in node.children() {
match child.kind() {
SyntaxKind::REFERENCE_DEFINITION => {
if let Some((label, url, title)) = parse_reference_def(&child) {
ctx.refs
.entry(normalize_ref_label(&label))
.or_insert((url, title));
}
}
SyntaxKind::FOOTNOTE_DEFINITION => {
if let Some((label, blocks)) = parse_footnote_def(&child) {
ctx.footnotes.entry(label).or_insert(blocks);
}
}
SyntaxKind::HEADING => {
let (id, was_explicit) = heading_id_with_explicitness(&child);
let final_id = if was_explicit {
seen_ids.entry(id.clone()).or_insert(0);
id
} else {
let mut base = id;
if base.is_empty() {
base = "section".to_string();
}
let count = seen_ids.entry(base.clone()).or_insert(0);
let id = if *count == 0 {
base
} else {
format!("{base}-{count}")
};
*count += 1;
id
};
if !final_id.is_empty() {
let offset: u32 = child.text_range().start().into();
ctx.heading_ids.insert(final_id.clone());
ctx.heading_id_by_offset.insert(offset, final_id);
}
collect_refs_and_headings(&child, ctx, seen_ids);
}
_ => collect_refs_and_headings(&child, ctx, seen_ids),
}
}
}
fn heading_id_with_explicitness(node: &SyntaxNode) -> (String, bool) {
let inlines = node
.children()
.find(|c| c.kind() == SyntaxKind::HEADING_CONTENT)
.map(|c| coalesce_inlines(inlines_from(&c)))
.unwrap_or_default();
let attr = node.children_with_tokens().find_map(|el| match el {
NodeOrToken::Node(n) if n.kind() == SyntaxKind::ATTRIBUTE => Some(n.text().to_string()),
NodeOrToken::Token(t) if t.kind() == SyntaxKind::ATTRIBUTE => Some(t.text().to_string()),
_ => None,
});
if let Some(raw) = attr {
let trimmed = raw.trim();
if let Some(inner) = trimmed.strip_prefix('{').and_then(|s| s.strip_suffix('}')) {
let parsed = parse_attr_block(inner);
if !parsed.id.is_empty() {
return (parsed.id, true);
}
}
}
(pandoc_slugify(&inlines_to_plaintext(&inlines)), false)
}
fn parse_footnote_def(node: &SyntaxNode) -> Option<(String, Vec<Block>)> {
let label = footnote_label(node)?;
let mut blocks = Vec::new();
for child in node.children() {
if child.kind() == SyntaxKind::CODE_BLOCK
&& !child
.children()
.any(|c| c.kind() == SyntaxKind::CODE_FENCE_OPEN)
{
blocks.push(indented_code_block_with_extra_strip(&child, 4));
} else {
collect_block(&child, &mut blocks);
}
}
Some((label, blocks))
}
fn indented_code_block_with_extra_strip(node: &SyntaxNode, extra: usize) -> Block {
let raw_format = code_block_raw_format(node);
let attr = code_block_attr(node);
let is_fenced = node
.children()
.any(|c| c.kind() == SyntaxKind::CODE_FENCE_OPEN);
let mut content = String::new();
for child in node.children() {
if child.kind() == SyntaxKind::CODE_CONTENT {
content.push_str(&child.text().to_string());
}
}
while content.ends_with('\n') {
content.pop();
}
content = content
.split('\n')
.map(expand_tabs_to_4)
.collect::<Vec<_>>()
.join("\n");
content = strip_leading_spaces_per_line(&content, extra);
if !is_fenced {
content = strip_indented_code_indent(&content);
}
if let Some(fmt) = raw_format {
return Block::RawBlock(fmt, content);
}
Block::CodeBlock(attr, content)
}
fn strip_leading_spaces_per_line(s: &str, n: usize) -> String {
let mut out = String::with_capacity(s.len());
for (i, line) in s.split('\n').enumerate() {
if i > 0 {
out.push('\n');
}
let to_strip = line.chars().take(n).take_while(|&c| c == ' ').count();
out.push_str(&line[to_strip..]);
}
out
}
fn footnote_label(node: &SyntaxNode) -> Option<String> {
for el in node.children_with_tokens() {
if let NodeOrToken::Token(t) = el
&& t.kind() == SyntaxKind::FOOTNOTE_LABEL_ID
{
return Some(t.text().to_string());
}
}
None
}
fn parse_reference_def(node: &SyntaxNode) -> Option<(String, String, String)> {
let link = node.children().find(|c| c.kind() == SyntaxKind::LINK)?;
let label_node = link
.children()
.find(|c| c.kind() == SyntaxKind::LINK_TEXT)?;
let label = label_node.text().to_string();
let mut tail = String::new();
let mut after_link = false;
for el in node.children_with_tokens() {
if after_link {
match el {
NodeOrToken::Token(t) => tail.push_str(t.text()),
NodeOrToken::Node(n) => tail.push_str(&n.text().to_string()),
}
} else if let NodeOrToken::Node(n) = &el
&& n.kind() == SyntaxKind::LINK
{
after_link = true;
}
}
let trimmed = tail.trim_start();
let rest = trimmed.strip_prefix(':')?;
let after_colon = rest.trim_start();
let (url, after_url) = parse_ref_url(after_colon);
let title = parse_dest_title(after_url.trim());
Some((unescape_label(&label), url, title))
}
fn parse_ref_url(s: &str) -> (String, &str) {
let s = s.trim_start();
if let Some(rest) = s.strip_prefix('<')
&& let Some(end) = rest.find('>')
{
return (rest[..end].to_string(), &rest[end + 1..]);
}
let end = s.find(|c: char| c.is_whitespace()).unwrap_or(s.len());
(s[..end].to_string(), &s[end..])
}
fn unescape_label(label: &str) -> String {
let mut out = String::with_capacity(label.len());
let mut chars = label.chars().peekable();
while let Some(ch) = chars.next() {
if ch == '\\'
&& let Some(&next) = chars.peek()
&& is_ascii_punct(next)
{
out.push(next);
chars.next();
} else {
out.push(ch);
}
}
out
}
fn is_ascii_punct(c: char) -> bool {
c.is_ascii() && (c.is_ascii_punctuation())
}
fn normalize_ref_label(label: &str) -> String {
let unescaped = unescape_label(label);
let mut out = String::new();
let mut last_space = false;
for ch in unescaped.chars() {
if ch.is_whitespace() {
if !out.is_empty() && !last_space {
out.push(' ');
last_space = true;
}
} else {
for lc in ch.to_lowercase() {
out.push(lc);
}
last_space = false;
}
}
if last_space {
out.pop();
}
out
}
fn lookup_ref(label: &str) -> Option<(String, String)> {
let key = normalize_ref_label(label);
REFS_CTX.with(|c| c.borrow().refs.get(&key).cloned())
}
fn lookup_heading_id(label: &str) -> Option<String> {
let id = pandoc_slugify(&unescape_label(label));
if id.is_empty() {
return None;
}
REFS_CTX.with(|c| {
if c.borrow().heading_ids.contains(&id) {
Some(id)
} else {
None
}
})
}
pub fn normalize_native(s: &str) -> String {
let mut tokens = Vec::new();
let bytes = s.as_bytes();
let mut i = 0usize;
while i < bytes.len() {
let c = bytes[i];
match c {
b' ' | b'\t' | b'\n' | b'\r' => {
i += 1;
}
b'[' | b']' | b'(' | b')' | b',' => {
tokens.push((c as char).to_string());
i += 1;
}
b'"' => {
let start = i;
i += 1;
while i < bytes.len() {
match bytes[i] {
b'\\' if i + 1 < bytes.len() => {
i += 2;
}
b'"' => {
i += 1;
break;
}
_ => {
i += 1;
}
}
}
tokens.push(s[start..i].to_string());
}
_ => {
let start = i;
while i < bytes.len() {
let b = bytes[i];
if matches!(
b,
b' ' | b'\t' | b'\n' | b'\r' | b'[' | b']' | b'(' | b')' | b',' | b'"'
) {
break;
}
i += 1;
}
if i > start {
tokens.push(s[start..i].to_string());
}
}
}
}
tokens.join(" ")
}
#[derive(Debug)]
#[allow(clippy::enum_variant_names)]
enum Block {
Para(Vec<Inline>),
Plain(Vec<Inline>),
Header(usize, Attr, Vec<Inline>),
BlockQuote(Vec<Block>),
CodeBlock(Attr, String),
HorizontalRule,
BulletList(Vec<Vec<Block>>),
OrderedList(usize, &'static str, &'static str, Vec<Vec<Block>>),
RawBlock(String, String),
Table(TableData),
Div(Attr, Vec<Block>),
LineBlock(Vec<Vec<Inline>>),
DefinitionList(Vec<(Vec<Inline>, Vec<Vec<Block>>)>),
Figure(Attr, Vec<Block>, Vec<Block>),
Unsupported(String),
}
#[derive(Debug)]
struct TableData {
attr: Attr,
caption: Vec<Inline>,
aligns: Vec<&'static str>,
widths: Vec<Option<f64>>,
head_rows: Vec<Vec<GridCell>>,
body_rows: Vec<Vec<GridCell>>,
foot_rows: Vec<Vec<GridCell>>,
}
#[derive(Debug)]
struct GridCell {
row_span: u32,
col_span: u32,
blocks: Vec<Block>,
}
impl GridCell {
fn no_span(blocks: Vec<Block>) -> Self {
Self {
row_span: 1,
col_span: 1,
blocks,
}
}
}
#[derive(Debug)]
#[allow(clippy::enum_variant_names)]
enum Inline {
Str(String),
Space,
SoftBreak,
LineBreak,
Emph(Vec<Inline>),
Strong(Vec<Inline>),
Strikeout(Vec<Inline>),
Superscript(Vec<Inline>),
Subscript(Vec<Inline>),
Code(Attr, String),
Link(Attr, Vec<Inline>, String, String),
Image(Attr, Vec<Inline>, String, String),
Math(&'static str, String),
Span(Attr, Vec<Inline>),
RawInline(String, String),
Quoted(&'static str, Vec<Inline>),
Note(Vec<Block>),
Cite(Vec<Citation>, Vec<Inline>),
Unsupported(String),
}
#[derive(Debug)]
struct Citation {
id: String,
prefix: Vec<Inline>,
suffix: Vec<Inline>,
mode: CitationMode,
note_num: i64,
hash: i64,
}
#[derive(Debug, Clone, Copy)]
enum CitationMode {
AuthorInText,
NormalCitation,
SuppressAuthor,
}
#[derive(Debug, Default, Clone)]
struct Attr {
id: String,
classes: Vec<String>,
kvs: Vec<(String, String)>,
}
fn blocks_from_doc(doc: &SyntaxNode) -> Vec<Block> {
let mut out = Vec::new();
for child in doc.children() {
collect_block(&child, &mut out);
}
out
}
fn block_from(node: &SyntaxNode) -> Option<Block> {
match node.kind() {
SyntaxKind::PARAGRAPH => Some(Block::Para(coalesce_inlines(inlines_from(node)))),
SyntaxKind::PLAIN => Some(Block::Plain(coalesce_inlines(inlines_from(node)))),
SyntaxKind::HEADING => Some(heading_block(node)),
SyntaxKind::BLOCK_QUOTE => Some(Block::BlockQuote(blockquote_blocks(node))),
SyntaxKind::CODE_BLOCK => Some(code_block(node)),
SyntaxKind::HORIZONTAL_RULE => Some(Block::HorizontalRule),
SyntaxKind::LIST => Some(list_block(node)),
SyntaxKind::BLANK_LINE => None,
SyntaxKind::REFERENCE_DEFINITION => None,
SyntaxKind::FOOTNOTE_DEFINITION => None,
SyntaxKind::YAML_METADATA => None,
SyntaxKind::PANDOC_TITLE_BLOCK => None,
SyntaxKind::HTML_BLOCK => Some(html_block(node)),
SyntaxKind::PIPE_TABLE => pipe_table(node).map(Block::Table),
SyntaxKind::SIMPLE_TABLE => simple_table(node).map(Block::Table),
SyntaxKind::GRID_TABLE => grid_table(node).map(Block::Table),
SyntaxKind::MULTILINE_TABLE => multiline_table(node).map(Block::Table),
SyntaxKind::TEX_BLOCK => Some(tex_block(node)),
SyntaxKind::FENCED_DIV => Some(fenced_div(node)),
SyntaxKind::LINE_BLOCK => Some(line_block(node)),
SyntaxKind::DEFINITION_LIST => Some(definition_list(node)),
SyntaxKind::FIGURE => Some(figure_block(node)),
other => Some(Block::Unsupported(format!("{other:?}"))),
}
}
fn figure_block(node: &SyntaxNode) -> Block {
let mut alt: Vec<Inline> = Vec::new();
let mut image_inline: Option<Inline> = None;
if let Some(image) = node.children().find(|c| c.kind() == SyntaxKind::IMAGE_LINK) {
let alt_node = image.children().find(|c| c.kind() == SyntaxKind::IMAGE_ALT);
if let Some(an) = alt_node {
alt = coalesce_inlines(inlines_from(&an));
}
let mut tmp = Vec::new();
render_image_inline(&image, &mut tmp);
if let Some(first) = tmp.into_iter().next() {
image_inline = Some(first);
}
}
let (figure_attr, image_inline) = match image_inline {
Some(Inline::Image(mut attr, alt_inlines, url, title)) if !attr.id.is_empty() => {
let fig_attr = Attr::with_id(std::mem::take(&mut attr.id));
(fig_attr, Some(Inline::Image(attr, alt_inlines, url, title)))
}
other => (Attr::default(), other),
};
let caption = if alt.is_empty() {
Vec::new()
} else {
vec![Block::Plain(alt)]
};
let body = match image_inline {
Some(img) => vec![Block::Plain(vec![img])],
None => Vec::new(),
};
Block::Figure(figure_attr, caption, body)
}
fn heading_block(node: &SyntaxNode) -> Block {
let level = heading_level(node);
let inlines = node
.children()
.find(|c| c.kind() == SyntaxKind::HEADING_CONTENT)
.map(|c| coalesce_inlines(inlines_from(&c)))
.unwrap_or_default();
let offset: u32 = node.text_range().start().into();
let final_id = REFS_CTX
.with(|c| c.borrow().heading_id_by_offset.get(&offset).cloned())
.unwrap_or_default();
let attr = node
.children_with_tokens()
.find_map(|el| match el {
NodeOrToken::Node(n) if n.kind() == SyntaxKind::ATTRIBUTE => Some(n.text().to_string()),
NodeOrToken::Token(t) if t.kind() == SyntaxKind::ATTRIBUTE => {
Some(t.text().to_string())
}
_ => None,
})
.map(|raw| {
let trimmed = raw.trim();
if let Some(inner) = trimmed.strip_prefix('{').and_then(|s| s.strip_suffix('}')) {
let mut attr = parse_attr_block(inner);
if attr.id.is_empty() {
attr.id = final_id.clone();
}
attr
} else {
Attr::with_id(final_id.clone())
}
})
.unwrap_or_else(|| Attr::with_id(final_id));
Block::Header(level, attr, inlines)
}
fn heading_level(node: &SyntaxNode) -> usize {
for child in node.children() {
if child.kind() == SyntaxKind::ATX_HEADING_MARKER {
for tok in child.children_with_tokens() {
if let Some(t) = tok.as_token()
&& t.kind() == SyntaxKind::ATX_HEADING_MARKER
{
return t.text().chars().filter(|&c| c == '#').count();
}
}
}
}
for el in node.descendants_with_tokens() {
if let NodeOrToken::Token(t) = el
&& t.kind() == SyntaxKind::SETEXT_HEADING_UNDERLINE
{
return if t.text().trim_start().starts_with('=') {
1
} else {
2
};
}
}
1
}
fn blockquote_blocks(node: &SyntaxNode) -> Vec<Block> {
let mut out = Vec::new();
for child in node.children() {
collect_block(&child, &mut out);
}
out
}
fn code_block(node: &SyntaxNode) -> Block {
let raw_format = code_block_raw_format(node);
let attr = code_block_attr(node);
let is_fenced = node
.children()
.any(|c| c.kind() == SyntaxKind::CODE_FENCE_OPEN);
let mut content = String::new();
for child in node.children() {
if child.kind() == SyntaxKind::CODE_CONTENT {
content.push_str(&child.text().to_string());
}
}
while content.ends_with('\n') {
content.pop();
}
if is_fenced {
content = content
.split('\n')
.map(expand_tabs_to_4)
.collect::<Vec<_>>()
.join("\n");
} else {
content = strip_indented_code_indent(&content);
}
if let Some(fmt) = raw_format {
return Block::RawBlock(fmt, content);
}
Block::CodeBlock(attr, content)
}
fn code_block_raw_format(node: &SyntaxNode) -> Option<String> {
let open = node
.children()
.find(|c| c.kind() == SyntaxKind::CODE_FENCE_OPEN)?;
let info = open
.children()
.find(|c| c.kind() == SyntaxKind::CODE_INFO)?;
let raw = info.text().to_string();
let trimmed = raw.trim();
let inner = trimmed
.strip_prefix('{')
.and_then(|s| s.strip_suffix('}'))?;
let inner = inner.trim();
let format = inner.strip_prefix('=')?.trim();
if format.is_empty() || format.contains(char::is_whitespace) {
return None;
}
Some(format.to_string())
}
fn code_block_attr(node: &SyntaxNode) -> Attr {
let Some(open) = node
.children()
.find(|c| c.kind() == SyntaxKind::CODE_FENCE_OPEN)
else {
return Attr::default();
};
let Some(info) = open.children().find(|c| c.kind() == SyntaxKind::CODE_INFO) else {
return Attr::default();
};
let raw = info.text().to_string();
let trimmed = raw.trim();
if let Some(inner) = trimmed.strip_prefix('{').and_then(|s| s.strip_suffix('}')) {
return parse_attr_block(inner);
}
if let Some(brace) = trimmed.find('{')
&& trimmed.ends_with('}')
{
let lang = trimmed[..brace].trim();
let attr_inner = &trimmed[brace + 1..trimmed.len() - 1];
let mut attr = parse_attr_block(attr_inner);
if !lang.is_empty() {
attr.classes.insert(0, normalize_lang_id(lang));
}
return attr;
}
if !trimmed.is_empty() {
return Attr {
id: String::new(),
classes: vec![normalize_lang_id(trimmed)],
kvs: Vec::new(),
};
}
Attr::default()
}
fn normalize_lang_id(lang: &str) -> String {
let lower = lang.to_ascii_lowercase();
match lower.as_str() {
"c++" => "cpp".to_string(),
"objective-c" => "objectivec".to_string(),
_ => lower,
}
}
fn strip_indented_code_indent(s: &str) -> String {
let mut out = String::with_capacity(s.len());
for (i, line) in s.split('\n').enumerate() {
if i > 0 {
out.push('\n');
}
let expanded = expand_tabs_to_4(line);
let stripped = if let Some(rest) = expanded.strip_prefix(" ") {
rest.to_string()
} else if let Some(rest) = expanded.strip_prefix('\t') {
rest.to_string()
} else {
expanded
};
out.push_str(&stripped);
}
out
}
fn expand_tabs_to_4(line: &str) -> String {
let mut out = String::with_capacity(line.len());
let mut col = 0usize;
for c in line.chars() {
if c == '\t' {
let next = (col / 4 + 1) * 4;
for _ in col..next {
out.push(' ');
}
col = next;
} else {
out.push(c);
col += 1;
}
}
out
}
fn html_block(node: &SyntaxNode) -> Block {
let mut content = node.text().to_string();
while content.ends_with('\n') {
content.pop();
}
if let Some(div) = try_div_html_block(&content) {
return div;
}
Block::RawBlock("html".to_string(), content)
}
fn emit_html_block(node: &SyntaxNode, out: &mut Vec<Block>) {
let mut content = node.text().to_string();
while content.ends_with('\n') {
content.pop();
}
if let Some(div) = try_div_html_block(&content) {
out.push(div);
return;
}
let leading_ws = content
.as_bytes()
.iter()
.position(|&b| b != b' ' && b != b'\t')
.unwrap_or(content.len());
let trimmed = &content[leading_ws..];
if trimmed.starts_with("<!--")
|| trimmed.starts_with("<?")
|| trimmed.starts_with("<![CDATA[")
|| trimmed.starts_with("<!")
|| is_raw_text_element_open(trimmed)
{
out.push(Block::RawBlock("html".to_string(), content));
return;
}
if !content.contains('\n') {
out.push(Block::RawBlock("html".to_string(), content));
return;
}
for line in content.split('\n') {
let line_trimmed = line.trim();
if line_trimmed.is_empty() {
continue;
}
if is_complete_html_tag_line(line_trimmed) {
out.push(Block::RawBlock(
"html".to_string(),
line_trimmed.to_string(),
));
} else {
let inlines = coalesce_inlines(parse_cell_text_inlines(line_trimmed));
if !inlines.is_empty() {
out.push(Block::Plain(inlines));
}
}
}
}
fn is_raw_text_element_open(s: &str) -> bool {
let bytes = s.as_bytes();
if bytes.is_empty() || bytes[0] != b'<' {
return false;
}
let rest = &s[1..];
for tag in ["script", "style", "pre", "textarea"] {
if rest.len() < tag.len() {
continue;
}
if rest[..tag.len()].eq_ignore_ascii_case(tag) {
let after = rest.as_bytes().get(tag.len()).copied();
match after {
None => return true,
Some(b' ') | Some(b'\t') | Some(b'\n') | Some(b'>') | Some(b'/') => {
return true;
}
_ => {}
}
}
}
false
}
fn is_complete_html_tag_line(s: &str) -> bool {
let bytes = s.as_bytes();
if bytes.is_empty() || bytes[0] != b'<' {
return false;
}
let mut i = 1;
while i < bytes.len() {
match bytes[i] {
b'>' => return i == bytes.len() - 1,
b'"' => {
i += 1;
while i < bytes.len() && bytes[i] != b'"' {
i += 1;
}
if i >= bytes.len() {
return false;
}
i += 1;
}
b'\'' => {
i += 1;
while i < bytes.len() && bytes[i] != b'\'' {
i += 1;
}
if i >= bytes.len() {
return false;
}
i += 1;
}
_ => i += 1,
}
}
false
}
fn collect_block(node: &SyntaxNode, out: &mut Vec<Block>) {
if node.kind() == SyntaxKind::HTML_BLOCK {
emit_html_block(node, out);
return;
}
if let Some(b) = block_from(node) {
out.push(b);
}
}
fn try_div_html_block(content: &str) -> Option<Block> {
let bytes = content.as_bytes();
let leading_ws = bytes
.iter()
.position(|&b| b != b' ' && b != b'\t')
.unwrap_or(bytes.len());
let head = &content[leading_ws..];
let head_bytes = head.as_bytes();
if head_bytes.len() < 4 || !head_bytes[..4].eq_ignore_ascii_case(b"<div") {
return None;
}
let after_div = head_bytes.get(4).copied();
match after_div {
Some(b' ') | Some(b'\t') | Some(b'\n') | Some(b'>') | Some(b'/') => {}
_ => return None,
}
let close_gt_rel = head[4..].find('>')?;
let open_attrs_raw = &head[4..4 + close_gt_rel];
let open_attrs = open_attrs_raw.trim_matches(|c: char| c.is_whitespace() || c == '/');
let attr = parse_html_attrs(open_attrs);
let after_open_tag = leading_ws + 4 + close_gt_rel + 1;
let multiline = content.as_bytes().get(after_open_tag).copied() == Some(b'\n');
let trailing_ws = content.as_bytes()[after_open_tag..]
.iter()
.rev()
.position(|&b| b != b' ' && b != b'\t' && b != b'\n')
.unwrap_or(0);
let close_end = content.len() - trailing_ws;
let close_search = &content[after_open_tag..close_end];
if !close_search.to_ascii_lowercase().ends_with("</div>") {
return None;
}
let close_start = after_open_tag + close_search.len() - "</div>".len();
let inner = content[after_open_tag..close_start].trim_matches('\n');
let mut blocks = parse_pandoc_blocks(inner);
if !multiline
&& blocks.len() == 1
&& let Block::Para(inlines) = blocks.remove(0)
{
blocks.push(Block::Plain(inlines));
}
Some(Block::Div(attr, blocks))
}
fn parse_pandoc_blocks(text: &str) -> Vec<Block> {
if text.trim().is_empty() {
return Vec::new();
}
let opts = crate::ParserOptions {
flavor: crate::Flavor::Pandoc,
dialect: crate::Dialect::for_flavor(crate::Flavor::Pandoc),
extensions: crate::Extensions::for_flavor(crate::Flavor::Pandoc),
..crate::ParserOptions::default()
};
let doc = crate::parse(text, Some(opts));
let mut out = Vec::new();
for child in doc.children() {
collect_block(&child, &mut out);
}
out
}
fn tex_block(node: &SyntaxNode) -> Block {
let mut content = node.text().to_string();
while content.ends_with('\n') {
content.pop();
}
Block::RawBlock("tex".to_string(), content)
}
fn fenced_div(node: &SyntaxNode) -> Block {
let attr = node
.children()
.find(|c| c.kind() == SyntaxKind::DIV_FENCE_OPEN)
.map(|open| {
let info = open
.children()
.find(|c| c.kind() == SyntaxKind::DIV_INFO)
.map(|n| n.text().to_string())
.unwrap_or_default();
parse_div_info(info.trim())
})
.unwrap_or_default();
let mut blocks = Vec::new();
for child in node.children() {
match child.kind() {
SyntaxKind::DIV_FENCE_OPEN | SyntaxKind::DIV_FENCE_CLOSE => {}
_ => collect_block(&child, &mut blocks),
}
}
Block::Div(attr, blocks)
}
fn parse_div_info(info: &str) -> Attr {
if info.starts_with('{') && info.ends_with('}') {
return parse_attr_block(&info[1..info.len() - 1]);
}
if !info.is_empty() {
return Attr {
id: String::new(),
classes: vec![info.to_string()],
kvs: Vec::new(),
};
}
Attr::default()
}
fn extract_attr_from_node(parent: &SyntaxNode) -> Attr {
let raw = parent.children_with_tokens().find_map(|el| match el {
NodeOrToken::Node(n) if n.kind() == SyntaxKind::ATTRIBUTE => Some(n.text().to_string()),
NodeOrToken::Token(t) if t.kind() == SyntaxKind::ATTRIBUTE => Some(t.text().to_string()),
_ => None,
});
let Some(raw) = raw else {
return Attr::default();
};
let trimmed = raw.trim();
if let Some(inner) = trimmed.strip_prefix('{').and_then(|s| s.strip_suffix('}')) {
parse_attr_block(inner)
} else {
Attr::default()
}
}
fn parse_attr_block(s: &str) -> Attr {
let mut id = String::new();
let mut classes: Vec<String> = Vec::new();
let mut kvs: Vec<(String, String)> = Vec::new();
let bytes = s.as_bytes();
let mut i = 0usize;
while i < bytes.len() {
match bytes[i] {
b' ' | b'\t' | b'\n' | b'\r' => {
i += 1;
}
b'#' => {
let start = i + 1;
let mut j = start;
while j < bytes.len() && !matches!(bytes[j], b' ' | b'\t' | b'\n' | b'\r') {
j += 1;
}
id = s[start..j].to_string();
i = j;
}
b'.' => {
let start = i + 1;
let mut j = start;
while j < bytes.len() && !matches!(bytes[j], b' ' | b'\t' | b'\n' | b'\r') {
j += 1;
}
classes.push(s[start..j].to_string());
i = j;
}
_ => {
let key_start = i;
while i < bytes.len() && !matches!(bytes[i], b' ' | b'\t' | b'\n' | b'\r' | b'=') {
i += 1;
}
let key = s[key_start..i].to_string();
if i < bytes.len() && bytes[i] == b'=' {
i += 1;
let value = if i < bytes.len() && bytes[i] == b'"' {
i += 1;
let v_start = i;
while i < bytes.len() && bytes[i] != b'"' {
i += 1;
}
let v = s[v_start..i].to_string();
if i < bytes.len() {
i += 1;
}
v
} else {
let v_start = i;
while i < bytes.len() && !matches!(bytes[i], b' ' | b'\t' | b'\n' | b'\r') {
i += 1;
}
s[v_start..i].to_string()
};
kvs.push((key, value));
} else if !key.is_empty() {
classes.push(key);
}
}
}
}
Attr { id, classes, kvs }
}
fn parse_html_attrs(s: &str) -> Attr {
let mut id = String::new();
let mut classes: Vec<String> = Vec::new();
let mut kvs: Vec<(String, String)> = Vec::new();
let bytes = s.as_bytes();
let mut i = 0usize;
while i < bytes.len() {
match bytes[i] {
b' ' | b'\t' | b'\n' | b'\r' => {
i += 1;
}
_ => {
let key_start = i;
while i < bytes.len() && !matches!(bytes[i], b' ' | b'\t' | b'\n' | b'\r' | b'=') {
i += 1;
}
let key = s[key_start..i].to_string();
let value = if i < bytes.len() && bytes[i] == b'=' {
i += 1;
if i < bytes.len() && (bytes[i] == b'"' || bytes[i] == b'\'') {
let quote = bytes[i];
i += 1;
let v_start = i;
while i < bytes.len() && bytes[i] != quote {
i += 1;
}
let v = s[v_start..i].to_string();
if i < bytes.len() {
i += 1;
}
v
} else {
let v_start = i;
while i < bytes.len() && !matches!(bytes[i], b' ' | b'\t' | b'\n' | b'\r') {
i += 1;
}
s[v_start..i].to_string()
}
} else {
String::new()
};
if key.is_empty() {
continue;
}
match key.as_str() {
"class" => {
for c in value.split_ascii_whitespace() {
classes.push(c.to_string());
}
}
"id" => id = value,
_ => kvs.push((key, value)),
}
}
}
}
Attr { id, classes, kvs }
}
fn definition_list(node: &SyntaxNode) -> Block {
let items: Vec<(Vec<Inline>, Vec<Vec<Block>>)> = node
.children()
.filter(|c| c.kind() == SyntaxKind::DEFINITION_ITEM)
.map(|item| {
let term = item
.children()
.find(|c| c.kind() == SyntaxKind::TERM)
.map(|t| coalesce_inlines(inlines_from(&t)))
.unwrap_or_default();
let loose = is_loose_definition_item(&item);
let defs: Vec<Vec<Block>> = item
.children()
.filter(|c| c.kind() == SyntaxKind::DEFINITION)
.map(|d| definition_blocks(&d, loose))
.collect();
(term, defs)
})
.collect();
Block::DefinitionList(items)
}
fn is_loose_definition_item(item: &SyntaxNode) -> bool {
let mut saw_term = false;
for child in item.children_with_tokens() {
if let NodeOrToken::Node(n) = child {
match n.kind() {
SyntaxKind::TERM => {
saw_term = true;
}
SyntaxKind::BLANK_LINE if saw_term => {
return true;
}
SyntaxKind::DEFINITION => {
return false;
}
_ => {}
}
}
}
false
}
fn definition_blocks(def_node: &SyntaxNode, loose: bool) -> Vec<Block> {
let extra = definition_content_offset(def_node);
let mut out = Vec::new();
for child in def_node.children() {
match child.kind() {
SyntaxKind::PLAIN => {
let inlines = coalesce_inlines(inlines_from(&child));
if loose {
out.push(Block::Para(inlines));
} else {
out.push(Block::Plain(inlines));
}
}
SyntaxKind::PARAGRAPH => {
out.push(Block::Para(coalesce_inlines(inlines_from(&child))));
}
SyntaxKind::CODE_BLOCK if extra > 0 => {
out.push(indented_code_block_with_extra_strip(&child, extra));
}
_ => collect_block(&child, &mut out),
}
}
out
}
fn definition_content_offset(def_node: &SyntaxNode) -> usize {
let mut col = 0usize;
let mut saw_marker = false;
for el in def_node.children_with_tokens() {
if let NodeOrToken::Token(t) = el {
match t.kind() {
SyntaxKind::DEFINITION_MARKER => {
col = advance_col(col, t.text());
saw_marker = true;
}
SyntaxKind::WHITESPACE if saw_marker => {
return advance_col(col, t.text());
}
_ if saw_marker => return col,
_ => {}
}
} else if saw_marker {
return col;
}
}
col
}
fn advance_col(start: usize, s: &str) -> usize {
let mut col = start;
for c in s.chars() {
if c == '\t' {
col = (col / 4 + 1) * 4;
} else {
col += 1;
}
}
col
}
fn line_block(node: &SyntaxNode) -> Block {
let lines: Vec<Vec<Inline>> = node
.children()
.filter(|c| c.kind() == SyntaxKind::LINE_BLOCK_LINE)
.map(|line| {
let mut out = Vec::new();
for el in line.children_with_tokens() {
match el {
NodeOrToken::Token(t) => match t.kind() {
SyntaxKind::LINE_BLOCK_MARKER | SyntaxKind::NEWLINE => {}
_ => push_token_inline(&t, &mut out),
},
NodeOrToken::Node(n) => out.push(inline_from_node(&n)),
}
}
coalesce_inlines(out)
})
.collect();
Block::LineBlock(lines)
}
fn latex_command_inline(node: &SyntaxNode) -> Inline {
let content = node.text().to_string();
Inline::RawInline("tex".to_string(), content)
}
fn bracketed_span_inline(node: &SyntaxNode) -> Inline {
let is_html = node
.children_with_tokens()
.any(|el| matches!(&el, NodeOrToken::Token(t) if t.kind() == SyntaxKind::SPAN_BRACKET_OPEN && t.text().starts_with('<')));
let attr_text = node.children_with_tokens().find_map(|el| match el {
NodeOrToken::Token(t) if t.kind() == SyntaxKind::SPAN_ATTRIBUTES => {
Some(t.text().to_string())
}
NodeOrToken::Node(n) if n.kind() == SyntaxKind::SPAN_ATTRIBUTES => {
Some(n.text().to_string())
}
_ => None,
});
let attr = attr_text
.map(|raw| {
let trimmed = raw.trim();
if is_html {
parse_html_attrs(trimmed)
} else if let Some(inner) = trimmed.strip_prefix('{').and_then(|s| s.strip_suffix('}'))
{
parse_attr_block(inner)
} else {
Attr::default()
}
})
.unwrap_or_default();
let content = node
.children()
.find(|c| c.kind() == SyntaxKind::SPAN_CONTENT)
.map(|n| coalesce_inlines(inlines_from(&n)))
.unwrap_or_default();
Inline::Span(attr, content)
}
fn pipe_table(node: &SyntaxNode) -> Option<TableData> {
let mut header_cells: Vec<Vec<Inline>> = Vec::new();
let mut body_rows: Vec<Vec<Vec<Inline>>> = Vec::new();
let mut aligns: Vec<&'static str> = Vec::new();
let mut caption_inlines: Vec<Inline> = Vec::new();
for child in node.children() {
match child.kind() {
SyntaxKind::TABLE_HEADER => {
header_cells = pipe_table_cells(&child);
}
SyntaxKind::TABLE_SEPARATOR => {
let raw = child.text().to_string();
aligns = pipe_separator_aligns(&raw);
}
SyntaxKind::TABLE_ROW => {
body_rows.push(pipe_table_cells(&child));
}
SyntaxKind::TABLE_CAPTION => {
caption_inlines = pipe_table_caption(&child);
}
_ => {}
}
}
let cols = header_cells
.len()
.max(body_rows.iter().map(Vec::len).max().unwrap_or(0))
.max(aligns.len());
if cols == 0 {
return None;
}
while aligns.len() < cols {
aligns.push("AlignDefault");
}
let head_rows = if header_cells.is_empty() {
Vec::new()
} else {
vec![cells_to_plain_blocks(header_cells, cols)]
};
let body_rows: Vec<Vec<GridCell>> = body_rows
.into_iter()
.map(|cells| cells_to_plain_blocks(cells, cols))
.collect();
let (attr, caption_inlines) = extract_caption_attrs(caption_inlines);
Some(TableData {
attr,
caption: caption_inlines,
aligns,
widths: vec![None; cols],
head_rows,
body_rows,
foot_rows: Vec::new(),
})
}
fn pipe_table_cells(row: &SyntaxNode) -> Vec<Vec<Inline>> {
row.children()
.filter(|c| c.kind() == SyntaxKind::TABLE_CELL)
.map(|cell| coalesce_inlines(inlines_from(&cell)))
.collect()
}
fn extract_caption_attrs(mut inlines: Vec<Inline>) -> (Attr, Vec<Inline>) {
let last_str_end = inlines
.iter()
.rposition(|i| matches!(i, Inline::Str(s) if s.ends_with('}')));
let Some(end_idx) = last_str_end else {
return (Attr::default(), inlines);
};
let mut start_idx = end_idx;
let mut found_open = false;
loop {
match &inlines[start_idx] {
Inline::Str(s) => {
if s.starts_with('{') {
found_open = true;
break;
}
}
Inline::Space => {}
_ => return (Attr::default(), inlines),
}
if start_idx == 0 {
break;
}
start_idx -= 1;
}
if !found_open {
return (Attr::default(), inlines);
}
let mut raw = String::new();
for el in &inlines[start_idx..=end_idx] {
match el {
Inline::Str(s) => raw.push_str(s),
Inline::Space => raw.push(' '),
_ => return (Attr::default(), inlines),
}
}
if !(raw.starts_with('{') && raw.ends_with('}')) {
return (Attr::default(), inlines);
}
let inner = &raw[1..raw.len() - 1];
let attr = parse_attr_block(inner);
inlines.truncate(start_idx);
if matches!(inlines.last(), Some(Inline::Space)) {
inlines.pop();
}
(attr, inlines)
}
fn pipe_table_caption(node: &SyntaxNode) -> Vec<Inline> {
let mut out = Vec::new();
let mut after_prefix = false;
for el in node.children_with_tokens() {
match el {
NodeOrToken::Node(n) => {
if n.kind() == SyntaxKind::TABLE_CAPTION_PREFIX {
after_prefix = true;
continue;
}
if after_prefix {
out.push(inline_from_node(&n));
}
}
NodeOrToken::Token(t) => {
if t.kind() == SyntaxKind::TABLE_CAPTION_PREFIX {
after_prefix = true;
continue;
}
if after_prefix {
push_token_inline(&t, &mut out);
}
}
}
}
coalesce_inlines(out)
}
fn pipe_separator_aligns(raw: &str) -> Vec<&'static str> {
let trimmed = raw.trim();
let inner = trimmed.trim_start_matches('|').trim_end_matches('|');
inner
.split('|')
.map(|seg| {
let s = seg.trim();
let left = s.starts_with(':');
let right = s.ends_with(':');
match (left, right) {
(true, true) => "AlignCenter",
(true, false) => "AlignLeft",
(false, true) => "AlignRight",
_ => "AlignDefault",
}
})
.collect()
}
fn cells_to_plain_blocks(cells: Vec<Vec<Inline>>, cols: usize) -> Vec<GridCell> {
let mut out: Vec<GridCell> = cells
.into_iter()
.map(|inlines| {
let blocks = if inlines.is_empty() {
Vec::new()
} else {
vec![Block::Plain(inlines)]
};
GridCell::no_span(blocks)
})
.collect();
while out.len() < cols {
out.push(GridCell::no_span(Vec::new()));
}
out
}
fn show_double(x: f64) -> String {
if x == 0.0 {
return "0.0".to_string();
}
let abs = x.abs();
if (0.1..1e7).contains(&abs) {
let s = format!("{x}");
if s.contains('.') || s.contains('e') {
s
} else {
format!("{s}.0")
}
} else {
let s = format!("{x:e}");
if let Some((m, e)) = s.split_once('e') {
if m.contains('.') {
s
} else {
format!("{m}.0e{e}")
}
} else {
s
}
}
}
fn simple_table(node: &SyntaxNode) -> Option<TableData> {
let separator = node
.children()
.find(|c| c.kind() == SyntaxKind::TABLE_SEPARATOR)?;
let cols = simple_table_dash_runs(&separator);
if cols.is_empty() {
return None;
}
let header = node
.children()
.find(|c| c.kind() == SyntaxKind::TABLE_HEADER);
let mut body_rows_nodes: Vec<SyntaxNode> = node
.children()
.filter(|c| c.kind() == SyntaxKind::TABLE_ROW)
.collect();
if header.is_none()
&& body_rows_nodes
.last()
.map(simple_table_row_is_all_dashes)
.unwrap_or(false)
{
body_rows_nodes.pop();
}
let aligns = if let Some(h) = &header {
simple_table_aligns(h, &cols)
} else if let Some(r0) = body_rows_nodes.first() {
simple_table_aligns(r0, &cols)
} else {
vec!["AlignDefault"; cols.len()]
};
let head_rows = match &header {
Some(h) => {
let cells: Vec<Vec<Inline>> = simple_table_row_cells(h);
vec![cells_to_plain_blocks(cells, cols.len())]
}
None => Vec::new(),
};
let body_rows: Vec<Vec<GridCell>> = body_rows_nodes
.iter()
.map(|r| cells_to_plain_blocks(simple_table_row_cells(r), cols.len()))
.collect();
let caption_inlines = node
.children()
.find(|c| c.kind() == SyntaxKind::TABLE_CAPTION)
.map(|n| pipe_table_caption(&n))
.unwrap_or_default();
let (attr, caption_inlines) = extract_caption_attrs(caption_inlines);
Some(TableData {
attr,
caption: caption_inlines,
aligns,
widths: vec![None; cols.len()],
head_rows,
body_rows,
foot_rows: Vec::new(),
})
}
fn simple_table_dash_runs(separator: &SyntaxNode) -> Vec<(usize, usize)> {
let raw = separator.text().to_string();
let line = raw.trim_end_matches(['\n', '\r']);
let mut runs = Vec::new();
let mut start: Option<usize> = None;
for (i, ch) in line.char_indices() {
if ch == '-' {
if start.is_none() {
start = Some(i);
}
} else if let Some(s) = start.take() {
runs.push((s, i - 1));
}
}
if let Some(s) = start.take() {
runs.push((s, line.len() - 1));
}
runs
}
fn simple_table_row_cells(row: &SyntaxNode) -> Vec<Vec<Inline>> {
row.children()
.filter(|c| c.kind() == SyntaxKind::TABLE_CELL)
.map(|cell| coalesce_inlines(inlines_from(&cell)))
.collect()
}
fn simple_table_row_is_all_dashes(row: &SyntaxNode) -> bool {
let mut had_cell = false;
for cell in row
.children()
.filter(|c| c.kind() == SyntaxKind::TABLE_CELL)
{
let text = cell.text().to_string();
let trimmed = text.trim();
if trimmed.is_empty() {
continue;
}
had_cell = true;
if !trimmed.chars().all(|c| c == '-') {
return false;
}
}
had_cell
}
fn simple_table_aligns(row: &SyntaxNode, cols: &[(usize, usize)]) -> Vec<&'static str> {
let row_start: u32 = row.text_range().start().into();
let mut cell_ranges: Vec<(usize, usize)> = Vec::new();
for cell in row
.children()
.filter(|c| c.kind() == SyntaxKind::TABLE_CELL)
{
if cell.text_range().is_empty() {
continue;
}
let text = cell.text().to_string();
let lstrip = text.chars().take_while(|c| *c == ' ' || *c == '\t').count();
let rstrip = text
.chars()
.rev()
.take_while(|c| *c == ' ' || *c == '\t')
.count();
let trimmed_len = text.chars().count().saturating_sub(lstrip + rstrip);
if trimmed_len == 0 {
continue;
}
let start: u32 = cell.text_range().start().into();
let s = (start - row_start) as usize;
let visible_start = s + lstrip;
let visible_end = visible_start + trimmed_len - 1;
cell_ranges.push((visible_start, visible_end));
}
cols.iter()
.map(|(col_start, col_end)| {
let cell = cell_ranges
.iter()
.find(|(cs, ce)| ce >= col_start && cs <= col_end);
match cell {
Some((cs, ce)) => {
let left_flush = cs == col_start;
let right_flush = ce == col_end;
match (left_flush, right_flush) {
(true, true) => "AlignDefault",
(true, false) => "AlignLeft",
(false, true) => "AlignRight",
(false, false) => "AlignCenter",
}
}
None => "AlignDefault",
}
})
.collect()
}
#[allow(clippy::needless_range_loop)]
fn grid_table(node: &SyntaxNode) -> Option<TableData> {
let mut tagged: Vec<(SyntaxKind, String)> = Vec::new();
for child in node.children() {
if child.kind() == SyntaxKind::TABLE_CAPTION {
continue;
}
let text = child.text().to_string();
for line in text.split_inclusive('\n') {
let trimmed = line.trim_end_matches('\n');
tagged.push((child.kind(), trimmed.to_string()));
}
}
if tagged.is_empty() {
return None;
}
let max_width = tagged
.iter()
.map(|(_, l)| l.chars().count())
.max()
.unwrap_or(0);
let grid: Vec<Vec<char>> = tagged
.iter()
.map(|(_, l)| {
let mut chars: Vec<char> = l.chars().collect();
chars.resize(max_width, ' ');
chars
})
.collect();
let nlines = grid.len();
let is_sep_line: Vec<bool> = grid
.iter()
.map(|row| {
row.contains(&'+')
&& row
.iter()
.all(|&c| matches!(c, '+' | '-' | '=' | ':' | '|' | ' '))
})
.collect();
let mut col_set: std::collections::BTreeSet<usize> = std::collections::BTreeSet::new();
for (i, row) in grid.iter().enumerate() {
if !is_sep_line[i] {
continue;
}
for (j, &c) in row.iter().enumerate() {
if c == '+' {
col_set.insert(j);
}
}
}
let cols_pos: Vec<usize> = col_set.into_iter().collect();
if cols_pos.len() < 2 {
return None;
}
let ncols = cols_pos.len() - 1;
let row_seps: Vec<usize> = (0..nlines).filter(|&i| is_sep_line[i]).collect();
if row_seps.len() < 2 {
return None;
}
let nrows = row_seps.len() - 1;
let mut block_kind: Vec<&'static str> = vec!["body"; nrows];
for r in 0..nrows {
let start = row_seps[r];
let end = row_seps[r + 1];
for i in (start + 1)..end {
match tagged[i].0 {
SyntaxKind::TABLE_HEADER => block_kind[r] = "head",
SyntaxKind::TABLE_FOOTER => block_kind[r] = "foot",
_ => {}
}
}
}
let mut occupied = vec![vec![false; ncols]; nrows];
let mut cells: Vec<(usize, usize, u32, u32, String)> = Vec::new();
for sr in 0..nrows {
for sc in 0..ncols {
if occupied[sr][sc] {
continue;
}
let i = row_seps[sr];
let j = cols_pos[sc];
if grid[i][j] != '+' {
continue;
}
let Some((er, ec, content)) = find_grid_cell(&grid, i, j, sr, sc, &cols_pos, &row_seps)
else {
continue;
};
let row_span = (er - sr) as u32;
let col_span = (ec - sc) as u32;
for r in sr..er {
for c in sc..ec {
occupied[r][c] = true;
}
}
cells.push((sr, sc, row_span, col_span, content));
}
}
let mut head_rows: Vec<Vec<GridCell>> = Vec::new();
let mut body_rows: Vec<Vec<GridCell>> = Vec::new();
let mut foot_rows: Vec<Vec<GridCell>> = Vec::new();
for r in 0..nrows {
let mut row_cells: Vec<&(usize, usize, u32, u32, String)> =
cells.iter().filter(|(sr, _, _, _, _)| *sr == r).collect();
row_cells.sort_by_key(|(_, sc, _, _, _)| *sc);
let row: Vec<GridCell> = row_cells
.into_iter()
.map(|(_, _, rs, cs, text)| {
let blocks = parse_grid_cell_text(text);
GridCell {
row_span: *rs,
col_span: *cs,
blocks,
}
})
.collect();
match block_kind[r] {
"head" => head_rows.push(row),
"foot" => foot_rows.push(row),
_ => body_rows.push(row),
}
}
let alignment_sep = node
.children()
.filter(|c| c.kind() == SyntaxKind::TABLE_SEPARATOR)
.find(|c| c.text().to_string().contains(':'))
.or_else(|| {
node.children()
.find(|c| c.kind() == SyntaxKind::TABLE_SEPARATOR)
})?;
let widths = grid_dash_widths(&alignment_sep);
let aligns_raw = alignment_sep.text().to_string();
let aligns = if aligns_raw.contains(':') {
grid_separator_aligns(&aligns_raw, ncols)
} else {
vec!["AlignDefault"; ncols]
};
let caption_inlines = node
.children()
.find(|c| c.kind() == SyntaxKind::TABLE_CAPTION)
.map(|n| pipe_table_caption(&n))
.unwrap_or_default();
let (attr, caption_inlines) = extract_caption_attrs(caption_inlines);
Some(TableData {
attr,
caption: caption_inlines,
aligns,
widths: widths.into_iter().map(Some).collect(),
head_rows,
body_rows,
foot_rows,
})
}
#[allow(clippy::needless_range_loop)]
fn find_grid_cell(
grid: &[Vec<char>],
i: usize,
j: usize,
sr: usize,
sc: usize,
cols_pos: &[usize],
row_seps: &[usize],
) -> Option<(usize, usize, String)> {
let nrows = row_seps.len() - 1;
let ncols = cols_pos.len() - 1;
for ec in (sc + 1)..=ncols {
let k = cols_pos[ec];
let top_ok = (j + 1..k).all(|c| matches!(grid[i][c], '-' | '=' | ':' | '+'));
if !top_ok {
break;
}
for er in (sr + 1)..=nrows {
let l = row_seps[er];
let left_ok = (i + 1..l).all(|r| matches!(grid[r][j], '|' | '+'));
if !left_ok {
break;
}
let right_ok = (i + 1..l).all(|r| matches!(grid[r][k], '|' | '+'));
if !right_ok {
continue;
}
let bot_ok = (j + 1..k).all(|c| matches!(grid[l][c], '-' | '=' | ':' | '+'));
if !bot_ok {
continue;
}
if grid[l][j] != '+' || grid[l][k] != '+' {
continue;
}
let interior_split = (i + 1..l).any(|m| {
grid[m][j] == '+'
&& grid[m][k] == '+'
&& (j + 1..k).all(|c| matches!(grid[m][c], '-' | '=' | ':' | '+'))
});
if interior_split {
continue;
}
let mut content_lines: Vec<String> = Vec::new();
for r in (i + 1)..l {
let slice: String = grid[r][j + 1..k].iter().collect();
let stripped = slice.strip_prefix(' ').unwrap_or(&slice).to_string();
content_lines.push(stripped.trim_end().to_string());
}
let first = content_lines.iter().position(|s| !s.is_empty());
let last = content_lines.iter().rposition(|s| !s.is_empty());
let content = match (first, last) {
(Some(f), Some(l)) => content_lines[f..=l].join("\n"),
_ => String::new(),
};
return Some((er, ec, content));
}
}
None
}
fn parse_grid_cell_text(text: &str) -> Vec<Block> {
if text.trim().is_empty() {
return Vec::new();
}
let opts = crate::ParserOptions {
flavor: crate::Flavor::Pandoc,
dialect: crate::Dialect::for_flavor(crate::Flavor::Pandoc),
extensions: crate::Extensions::for_flavor(crate::Flavor::Pandoc),
..crate::ParserOptions::default()
};
let doc = crate::parse(text, Some(opts));
let mut out = Vec::new();
for child in doc.children() {
if let Some(block) = block_from(&child) {
let block = match block {
Block::Para(inlines) => Block::Plain(inlines),
other => other,
};
out.push(block);
}
}
out
}
fn grid_dash_widths(separator: &SyntaxNode) -> Vec<f64> {
let raw_text = separator.text().to_string();
let line = raw_text.trim_end_matches(['\n', '\r']);
let mut raw: Vec<usize> = Vec::new();
let mut count: usize = 0;
let mut in_col = false;
for ch in line.chars() {
match ch {
'+' => {
if in_col {
raw.push(count + 1);
count = 0;
}
in_col = true;
}
_ => {
if in_col {
count += 1;
}
}
}
}
if raw.is_empty() {
return Vec::new();
}
let total: usize = raw.iter().sum();
let count = raw.len();
let norm = (total + count).saturating_sub(2).max(72) as f64;
raw.into_iter().map(|w| w as f64 / norm).collect()
}
fn grid_separator_aligns(raw: &str, cols: usize) -> Vec<&'static str> {
let line = raw.trim_end_matches(['\n', '\r']);
let mut aligns: Vec<&'static str> = Vec::with_capacity(cols);
let mut col_start: Option<usize> = None;
for (i, ch) in line.char_indices() {
if ch == '+' {
if let Some(s) = col_start.take() {
let seg = &line[s..i];
aligns.push(grid_segment_align(seg));
}
col_start = Some(i + 1);
}
}
while aligns.len() < cols {
aligns.push("AlignDefault");
}
aligns.truncate(cols);
aligns
}
fn grid_segment_align(seg: &str) -> &'static str {
let bytes = seg.as_bytes();
let left = bytes.first() == Some(&b':');
let right = bytes.last() == Some(&b':');
match (left, right) {
(true, true) => "AlignCenter",
(true, false) => "AlignLeft",
(false, true) => "AlignRight",
_ => "AlignDefault",
}
}
fn multiline_table(node: &SyntaxNode) -> Option<TableData> {
let separators: Vec<SyntaxNode> = node
.children()
.filter(|c| c.kind() == SyntaxKind::TABLE_SEPARATOR)
.collect();
let header = node
.children()
.find(|c| c.kind() == SyntaxKind::TABLE_HEADER);
let column_sep = if header.is_some() {
separators.get(1).cloned()
} else {
separators.first().cloned()
}?;
let cols = simple_table_dash_runs(&column_sep);
if cols.is_empty() {
return None;
}
let raw: Vec<usize> = cols
.iter()
.enumerate()
.map(|(i, (s, e))| {
if i + 1 < cols.len() {
cols[i + 1].0 - s
} else {
e - s + 2
}
})
.collect();
let total: usize = raw.iter().sum();
let norm = (total.max(72)) as f64;
let widths: Vec<f64> = raw.into_iter().map(|w| w as f64 / norm).collect();
let aligns = if let Some(h) = &header {
simple_table_aligns(h, &cols)
} else if let Some(r0) = node.children().find(|c| c.kind() == SyntaxKind::TABLE_ROW) {
simple_table_aligns(&r0, &cols)
} else {
vec!["AlignDefault"; cols.len()]
};
let head_rows = match &header {
Some(h) => vec![
multiline_row_cells_blocks(h, &cols)
.into_iter()
.map(GridCell::no_span)
.collect(),
],
None => Vec::new(),
};
let body_rows: Vec<Vec<GridCell>> = node
.children()
.filter(|c| c.kind() == SyntaxKind::TABLE_ROW)
.map(|r| {
multiline_row_cells_blocks(&r, &cols)
.into_iter()
.map(GridCell::no_span)
.collect()
})
.collect();
let caption_inlines = node
.children()
.find(|c| c.kind() == SyntaxKind::TABLE_CAPTION)
.map(|n| pipe_table_caption(&n))
.unwrap_or_default();
let (attr, caption_inlines) = extract_caption_attrs(caption_inlines);
Some(TableData {
attr,
caption: caption_inlines,
aligns,
widths: widths.into_iter().map(Some).collect(),
head_rows,
body_rows,
foot_rows: Vec::new(),
})
}
fn multiline_row_cells_blocks(row: &SyntaxNode, cols: &[(usize, usize)]) -> Vec<Vec<Block>> {
let row_start: u32 = row.text_range().start().into();
let raw = row.text().to_string();
let lines: Vec<&str> = raw.split_inclusive('\n').collect();
let mut col_lines: Vec<Vec<String>> = vec![Vec::new(); cols.len()];
let mut line_start_offset: usize = 0;
for line in lines {
let line_no_nl = line.trim_end_matches('\n');
if line_no_nl.trim().is_empty() {
line_start_offset += line.len();
continue;
}
for (i, &(cs, ce)) in cols.iter().enumerate() {
let slice = char_slice(line_no_nl, cs, ce + 1);
let trimmed = slice.trim();
if !trimmed.is_empty() {
col_lines[i].push(trimmed.to_string());
}
}
line_start_offset += line.len();
}
let _ = (row_start, line_start_offset);
cols.iter()
.enumerate()
.map(|(i, _)| {
let segments = &col_lines[i];
if segments.is_empty() {
return Vec::new();
}
let joined = segments.join("\n");
let inlines = parse_cell_text_inlines(&joined);
if inlines.is_empty() {
return Vec::new();
}
vec![Block::Plain(coalesce_inlines(inlines))]
})
.collect()
}
fn parse_cell_text_inlines(text: &str) -> Vec<Inline> {
if text.trim().is_empty() {
return Vec::new();
}
let opts = crate::ParserOptions {
flavor: crate::Flavor::Pandoc,
dialect: crate::Dialect::for_flavor(crate::Flavor::Pandoc),
extensions: crate::Extensions::for_flavor(crate::Flavor::Pandoc),
..crate::ParserOptions::default()
};
let doc = crate::parse(text, Some(opts));
for node in doc.descendants() {
if matches!(node.kind(), SyntaxKind::PARAGRAPH | SyntaxKind::PLAIN) {
return inlines_from(&node);
}
}
Vec::new()
}
fn char_slice(s: &str, start_char: usize, end_char: usize) -> &str {
let mut start_byte = s.len();
let mut end_byte = s.len();
for (i, (b, _)) in s.char_indices().enumerate() {
if i == start_char {
start_byte = b;
}
if i == end_char {
end_byte = b;
break;
}
}
if start_byte > end_byte {
return "";
}
&s[start_byte..end_byte]
}
fn list_block(node: &SyntaxNode) -> Block {
let loose = is_loose_list(node);
let items: Vec<Vec<Block>> = node
.children()
.filter(|c| c.kind() == SyntaxKind::LIST_ITEM)
.map(|item| list_item_blocks(&item, loose))
.collect();
if list_is_ordered(node) {
let (start, style, delim) = ordered_list_attrs(node);
Block::OrderedList(start, style, delim, items)
} else {
Block::BulletList(items)
}
}
fn list_is_ordered(node: &SyntaxNode) -> bool {
let Some(item) = node.children().find(|c| c.kind() == SyntaxKind::LIST_ITEM) else {
return false;
};
let marker = item
.children_with_tokens()
.filter_map(|el| el.into_token())
.find(|t| t.kind() == SyntaxKind::LIST_MARKER)
.map(|t| t.text().to_string())
.unwrap_or_default();
let trimmed = marker.trim();
!trimmed.starts_with(['-', '+', '*'])
}
fn ordered_list_attrs(node: &SyntaxNode) -> (usize, &'static str, &'static str) {
let item = node.children().find(|c| c.kind() == SyntaxKind::LIST_ITEM);
let marker = item
.as_ref()
.and_then(|i| {
i.children_with_tokens()
.filter_map(|el| el.into_token())
.find(|t| t.kind() == SyntaxKind::LIST_MARKER)
.map(|t| t.text().to_string())
})
.unwrap_or_default();
let (mut start, style, delim) = classify_ordered_marker(marker.trim());
if style == "Example" {
let offset: u32 = node.text_range().start().into();
if let Some(s) = REFS_CTX.with(|c| {
c.borrow()
.example_list_start_by_offset
.get(&offset)
.copied()
}) {
start = s;
}
}
(start, style, delim)
}
fn classify_ordered_marker(trimmed: &str) -> (usize, &'static str, &'static str) {
let (body, delim) =
if let Some(inner) = trimmed.strip_prefix('(').and_then(|s| s.strip_suffix(')')) {
(inner, "TwoParens")
} else if let Some(inner) = trimmed.strip_suffix(')') {
(inner, "OneParen")
} else if let Some(inner) = trimmed.strip_suffix('.') {
(inner, "Period")
} else {
(trimmed, "DefaultDelim")
};
if !body.is_empty() && body.chars().all(|c| c.is_ascii_digit()) {
let start: usize = body.parse().unwrap_or(1);
return (start, "Decimal", delim);
}
if body == "#" {
return (1, "DefaultStyle", "DefaultDelim");
}
if let Some(rest) = body.strip_prefix('@')
&& rest
.chars()
.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
{
return (1, "Example", delim);
}
if body == "i" {
return (1, "LowerRoman", delim);
}
if body == "I" {
return (1, "UpperRoman", delim);
}
if body.len() == 1
&& let Some(c) = body.chars().next()
{
if c.is_ascii_lowercase() {
return ((c as u8 - b'a') as usize + 1, "LowerAlpha", delim);
}
if c.is_ascii_uppercase() {
return ((c as u8 - b'A') as usize + 1, "UpperAlpha", delim);
}
}
if body
.chars()
.all(|c| matches!(c, 'i' | 'v' | 'x' | 'l' | 'c' | 'd' | 'm'))
&& let Some(n) = roman_to_int(body, false)
{
return (n, "LowerRoman", delim);
}
if body
.chars()
.all(|c| matches!(c, 'I' | 'V' | 'X' | 'L' | 'C' | 'D' | 'M'))
&& let Some(n) = roman_to_int(body, true)
{
return (n, "UpperRoman", delim);
}
(1, "Decimal", delim)
}
fn roman_to_int(s: &str, upper: bool) -> Option<usize> {
let normalize = |c: char| if upper { c } else { c.to_ascii_uppercase() };
let value = |c: char| match c {
'I' => 1,
'V' => 5,
'X' => 10,
'L' => 50,
'C' => 100,
'D' => 500,
'M' => 1000,
_ => 0,
};
let chars: Vec<char> = s.chars().map(normalize).collect();
if chars.is_empty() {
return None;
}
let mut total = 0usize;
let mut i = 0;
while i < chars.len() {
let v = value(chars[i]);
if v == 0 {
return None;
}
let next = chars.get(i + 1).copied().map(value).unwrap_or(0);
if v < next {
total += next - v;
i += 2;
} else {
total += v;
i += 1;
}
}
Some(total)
}
fn list_item_blocks(item: &SyntaxNode, loose: bool) -> Vec<Block> {
let mut out = Vec::new();
let item_indent = list_item_content_offset(item);
let task_checkbox = task_checkbox_for_item(item);
let mut checkbox_emitted = false;
for child in item.children() {
match child.kind() {
SyntaxKind::PLAIN => {
let mut inlines = coalesce_inlines(inlines_from(&child));
if inlines.is_empty() {
continue;
}
if !checkbox_emitted && let Some(glyph) = task_checkbox {
inlines.insert(0, Inline::Space);
inlines.insert(0, Inline::Str(glyph.to_string()));
checkbox_emitted = true;
}
if loose {
out.push(Block::Para(inlines));
} else {
out.push(Block::Plain(inlines));
}
}
SyntaxKind::CODE_BLOCK => {
out.push(indented_code_block_with_extra_strip(&child, item_indent));
}
_ => collect_block(&child, &mut out),
}
}
out
}
fn task_checkbox_for_item(item: &SyntaxNode) -> Option<&'static str> {
item.children_with_tokens()
.filter_map(|el| el.into_token())
.find(|t| t.kind() == SyntaxKind::TASK_CHECKBOX)
.map(|t| {
let text = t.text();
if text.contains('x') || text.contains('X') {
"\u{2612}"
} else {
"\u{2610}"
}
})
}
fn list_item_content_offset(item: &SyntaxNode) -> usize {
let parent_ws = parent_list_leading_ws(item);
let mut marker_width = 0usize;
let mut leading_ws = 0usize;
let mut saw_marker = false;
for el in item.children_with_tokens() {
if let NodeOrToken::Token(t) = el {
match t.kind() {
SyntaxKind::WHITESPACE if !saw_marker => {
leading_ws += t.text().chars().count();
}
SyntaxKind::LIST_MARKER => {
marker_width += t.text().chars().count();
saw_marker = true;
}
SyntaxKind::WHITESPACE if saw_marker => {
return parent_ws + leading_ws + marker_width + t.text().chars().count();
}
_ if saw_marker => {
return parent_ws + leading_ws + marker_width;
}
_ => {}
}
} else if saw_marker {
return parent_ws + leading_ws + marker_width;
}
}
parent_ws + leading_ws + marker_width
}
fn parent_list_leading_ws(item: &SyntaxNode) -> usize {
let prev = item.prev_sibling_or_token();
match prev {
Some(NodeOrToken::Token(t)) if t.kind() == SyntaxKind::WHITESPACE => {
t.text().chars().count()
}
_ => 0,
}
}
fn is_loose_list(node: &SyntaxNode) -> bool {
let mut prev_was_item = false;
for child in node.children_with_tokens() {
if let NodeOrToken::Node(n) = child {
if n.kind() == SyntaxKind::LIST_ITEM {
prev_was_item = true;
} else if n.kind() == SyntaxKind::BLANK_LINE
&& prev_was_item
&& n.next_sibling()
.map(|s| s.kind() == SyntaxKind::LIST_ITEM)
.unwrap_or(false)
{
return true;
}
}
}
for item in node
.children()
.filter(|c| c.kind() == SyntaxKind::LIST_ITEM)
{
if item.children().any(|c| c.kind() == SyntaxKind::PARAGRAPH) {
return true;
}
if has_internal_blank_between_blocks(&item) {
return true;
}
}
false
}
fn has_internal_blank_between_blocks(item: &SyntaxNode) -> bool {
let mut saw_block_before = false;
let mut pending_blank = false;
for child in item.children() {
match child.kind() {
SyntaxKind::BLANK_LINE => {
if saw_block_before {
pending_blank = true;
}
}
SyntaxKind::PLAIN if child_is_empty_plain(&child) => {}
_ => {
if pending_blank {
return true;
}
saw_block_before = true;
}
}
}
false
}
fn child_is_empty_plain(node: &SyntaxNode) -> bool {
!node.children_with_tokens().any(|el| match el {
NodeOrToken::Token(t) => !matches!(t.kind(), SyntaxKind::NEWLINE | SyntaxKind::WHITESPACE),
NodeOrToken::Node(_) => true,
})
}
fn inlines_from(parent: &SyntaxNode) -> Vec<Inline> {
let mut out = Vec::new();
let mut iter = parent.children_with_tokens().peekable();
while let Some(el) = iter.next() {
match el {
NodeOrToken::Token(t) => push_token_inline(&t, &mut out),
NodeOrToken::Node(n) if n.kind() == SyntaxKind::LATEX_COMMAND => {
emit_latex_command_with_absorb(&n, &mut iter, &mut out);
}
NodeOrToken::Node(n) if n.kind() == SyntaxKind::CITATION => {
emit_citation_with_absorb(&n, &mut iter, &mut out);
}
NodeOrToken::Node(n) => push_inline_node(&n, &mut out),
}
}
while matches!(out.last(), Some(Inline::SoftBreak)) {
out.pop();
}
out
}
fn emit_citation_with_absorb<I>(
node: &SyntaxNode,
iter: &mut std::iter::Peekable<I>,
out: &mut Vec<Inline>,
) where
I: Iterator<Item = rowan::SyntaxElement<crate::syntax::PanacheLanguage>>,
{
let bracketed = node
.children_with_tokens()
.filter_map(|el| el.into_token())
.any(|t| t.kind() == SyntaxKind::LINK_START);
if bracketed {
render_citation_inline(node, out, None);
return;
}
let next_sibling_pair = node.next_sibling_or_token().and_then(|el1| {
let t = el1.as_token().cloned()?;
if t.kind() != SyntaxKind::TEXT || !t.text().starts_with(' ') {
return None;
}
let space_text = t.text().to_string();
let link_el = t.next_sibling_or_token()?;
let link = link_el.as_node().cloned()?;
if link.kind() != SyntaxKind::LINK && link.kind() != SyntaxKind::UNRESOLVED_REFERENCE {
return None;
}
let has_dest = link
.children_with_tokens()
.filter_map(|el| el.into_token())
.any(|tok| tok.kind() == SyntaxKind::LINK_DEST_START);
if has_dest {
return None;
}
let link_text = link
.children()
.find(|c| c.kind() == SyntaxKind::LINK_TEXT)
.map(|tt| tt.text().to_string())
.unwrap_or_default();
Some((space_text, link_text))
});
if let Some((_space_text, locator_text)) = next_sibling_pair {
iter.next();
iter.next();
render_citation_inline(node, out, Some(&locator_text));
} else {
render_citation_inline(node, out, None);
}
}
fn emit_latex_command_with_absorb<I>(
node: &SyntaxNode,
iter: &mut std::iter::Peekable<I>,
out: &mut Vec<Inline>,
) where
I: Iterator<Item = rowan::SyntaxElement<crate::syntax::PanacheLanguage>>,
{
let mut content = node.text().to_string();
let ends_in_letter = content
.chars()
.next_back()
.is_some_and(|c| c.is_ascii_alphabetic());
if ends_in_letter
&& let Some(NodeOrToken::Token(t)) = iter.peek()
&& t.kind() == SyntaxKind::TEXT
{
let text = t.text().to_string();
let bytes = text.as_bytes();
let mut absorbed = 0;
while absorbed < bytes.len() && (bytes[absorbed] == b' ' || bytes[absorbed] == b'\t') {
absorbed += 1;
}
if absorbed > 0 {
content.push_str(&text[..absorbed]);
out.push(Inline::RawInline("tex".to_string(), content));
iter.next();
let remainder = &text[absorbed..];
if !remainder.is_empty() {
push_text(remainder, out);
}
return;
}
}
out.push(Inline::RawInline("tex".to_string(), content));
}
fn push_inline_node(node: &SyntaxNode, out: &mut Vec<Inline>) {
match node.kind() {
SyntaxKind::LINK => render_link_inline(node, out),
SyntaxKind::IMAGE_LINK => render_image_inline(node, out),
SyntaxKind::CITATION => render_citation_inline(node, out, None),
SyntaxKind::UNRESOLVED_REFERENCE => render_unresolved_reference_inline(node, out),
_ => out.push(inline_from_node(node)),
}
}
fn render_unresolved_reference_inline(node: &SyntaxNode, out: &mut Vec<Inline>) {
let is_image = node
.children()
.any(|c| c.kind() == SyntaxKind::IMAGE_LINK_START);
let text_node = if is_image {
node.children().find(|c| c.kind() == SyntaxKind::IMAGE_ALT)
} else {
node.children().find(|c| c.kind() == SyntaxKind::LINK_TEXT)
};
let ref_node = node.children().find(|c| c.kind() == SyntaxKind::LINK_REF);
let text_label = text_node
.as_ref()
.map(|n| n.text().to_string())
.unwrap_or_default();
let (label, has_second_brackets, second_inner) = match ref_node.as_ref() {
Some(rn) => {
let inner = rn.text().to_string();
if inner.is_empty() {
(text_label.clone(), true, String::new())
} else {
(inner.clone(), true, inner)
}
}
None => (text_label.clone(), false, String::new()),
};
if !is_image && let Some(id) = lookup_heading_id(&label) {
let url = format!("#{id}");
let resolved_text_inlines = text_node
.as_ref()
.map(|n| coalesce_inlines(inlines_from(n)))
.unwrap_or_default();
out.push(Inline::Link(
extract_attr_from_node(node),
resolved_text_inlines,
url,
String::new(),
));
return;
}
let unresolved_text_inlines = text_node
.as_ref()
.map(|n| coalesce_inlines_keep_edges(inlines_from(n)))
.unwrap_or_default();
let opener = if is_image { "![" } else { "[" };
out.push(Inline::Str(opener.to_string()));
out.extend(unresolved_text_inlines);
let suffix = if has_second_brackets {
format!("][{second_inner}]")
} else {
"]".to_string()
};
out.push(Inline::Str(suffix));
}
fn render_citation_inline(
node: &SyntaxNode,
out: &mut Vec<Inline>,
extra_suffix_text: Option<&str>,
) {
let first_key = node
.children_with_tokens()
.filter_map(|el| el.into_token())
.find(|t| t.kind() == SyntaxKind::CITATION_KEY)
.map(|t| t.text().to_string())
.unwrap_or_default();
let example_resolution =
REFS_CTX.with(|c| c.borrow().example_label_to_num.get(&first_key).copied());
if let Some(n) = example_resolution {
out.push(Inline::Str(n.to_string()));
return;
}
let bracketed = node
.children_with_tokens()
.filter_map(|el| el.into_token())
.any(|t| t.kind() == SyntaxKind::LINK_START);
let mut builders: Vec<CitationBuilder> = Vec::new();
let mut current: Option<CitationBuilder> = None;
let mut pending_prefix = String::new();
for el in node.children_with_tokens() {
let token = match el {
NodeOrToken::Token(t) => t,
_ => continue,
};
match token.kind() {
SyntaxKind::LINK_START | SyntaxKind::LINK_DEST => {}
SyntaxKind::CITATION_BRACE_OPEN | SyntaxKind::CITATION_BRACE_CLOSE => {}
SyntaxKind::CITATION_MARKER => {
if let Some(c) = current.take() {
builders.push(c);
}
let mode = if token.text() == "-@" {
CitationMode::SuppressAuthor
} else if bracketed {
CitationMode::NormalCitation
} else {
CitationMode::AuthorInText
};
current = Some(CitationBuilder::new(
std::mem::take(&mut pending_prefix),
mode,
));
}
SyntaxKind::CITATION_KEY => {
if let Some(c) = &mut current {
c.id.push_str(token.text());
}
}
SyntaxKind::CITATION_CONTENT => {
if let Some(c) = &mut current {
c.suffix_raw.push_str(token.text());
} else {
pending_prefix.push_str(token.text());
}
}
SyntaxKind::CITATION_SEPARATOR => {
if let Some(c) = current.take() {
builders.push(c);
}
}
_ => {}
}
}
if let Some(c) = current.take() {
builders.push(c);
}
if let Some(extra) = extra_suffix_text
&& let Some(last) = builders.last_mut()
{
if !last.suffix_raw.is_empty() && !extra.starts_with(' ') {
last.suffix_raw.push(' ');
}
last.suffix_raw.push_str(extra);
}
let note_offset: u32 = node.text_range().start().into();
let note_num = REFS_CTX
.with(|c| {
c.borrow()
.cite_note_num_by_offset
.get(¬e_offset)
.copied()
})
.unwrap_or(1);
let projected: Vec<Citation> = builders
.into_iter()
.map(|b| b.into_citation(note_num))
.collect();
let mut literal = node.text().to_string();
if let Some(extra) = extra_suffix_text {
literal.push(' ');
literal.push('[');
literal.push_str(extra);
literal.push(']');
}
let text_inlines = literal_inlines(&literal);
out.push(Inline::Cite(projected, text_inlines));
}
struct CitationBuilder {
id: String,
prefix_raw: String,
suffix_raw: String,
mode: CitationMode,
}
impl CitationBuilder {
fn new(prefix_raw: String, mode: CitationMode) -> Self {
Self {
id: String::new(),
prefix_raw,
suffix_raw: String::new(),
mode,
}
}
fn into_citation(self, note_num: i64) -> Citation {
let prefix = parse_cite_affix_inlines(self.prefix_raw.trim_end(), true);
let suffix = parse_cite_affix_inlines(&self.suffix_raw, false);
Citation {
id: self.id,
prefix,
suffix,
mode: self.mode,
note_num,
hash: 0,
}
}
}
fn parse_cite_affix_inlines(raw: &str, is_prefix: bool) -> Vec<Inline> {
if raw.is_empty() {
return Vec::new();
}
let trimmed = if is_prefix { raw.trim_start() } else { raw };
if trimmed.is_empty() {
return Vec::new();
}
let leading_space = !is_prefix && trimmed.starts_with([' ', '\t']);
let work = trimmed.trim_start_matches([' ', '\t']);
if work.is_empty() {
return if leading_space {
vec![Inline::Space]
} else {
Vec::new()
};
}
let wrapped = format!("Z {work}");
let inlines = parse_cell_text_inlines(&wrapped);
let mut coalesced = coalesce_inlines(inlines);
if matches!(coalesced.first(), Some(Inline::Str(s)) if s == "Z") {
coalesced.remove(0);
if matches!(coalesced.first(), Some(Inline::Space)) {
coalesced.remove(0);
}
}
if leading_space {
coalesced.insert(0, Inline::Space);
}
coalesced
}
fn literal_inlines(text: &str) -> Vec<Inline> {
let mut out: Vec<Inline> = Vec::new();
let mut buf = String::new();
for ch in text.chars() {
match ch {
' ' | '\t' => {
if !buf.is_empty() {
out.push(Inline::Str(std::mem::take(&mut buf)));
}
if !matches!(out.last(), Some(Inline::Space) | Some(Inline::SoftBreak)) {
out.push(Inline::Space);
}
}
'\n' => {
if !buf.is_empty() {
out.push(Inline::Str(std::mem::take(&mut buf)));
}
if matches!(out.last(), Some(Inline::Space)) {
out.pop();
}
out.push(Inline::SoftBreak);
}
_ => buf.push(ch),
}
}
if !buf.is_empty() {
out.push(Inline::Str(buf));
}
out
}
fn push_token_inline(
t: &rowan::SyntaxToken<crate::syntax::PanacheLanguage>,
out: &mut Vec<Inline>,
) {
match t.kind() {
SyntaxKind::TEXT => push_text(t.text(), out),
SyntaxKind::WHITESPACE => out.push(Inline::Space),
SyntaxKind::NEWLINE => out.push(Inline::SoftBreak),
SyntaxKind::HARD_LINE_BREAK => out.push(Inline::LineBreak),
SyntaxKind::ESCAPED_CHAR => {
let s: String = t.text().chars().skip(1).collect();
out.push(Inline::Str(s));
}
SyntaxKind::NONBREAKING_SPACE => out.push(Inline::Str("\u{a0}".to_string())),
_ => {}
}
}
fn push_text(text: &str, out: &mut Vec<Inline>) {
let mut buf = String::new();
for ch in text.chars() {
if ch == ' ' || ch == '\t' {
if !buf.is_empty() {
out.push(Inline::Str(std::mem::take(&mut buf)));
}
out.push(Inline::Space);
} else if ch == '\n' {
if !buf.is_empty() {
out.push(Inline::Str(std::mem::take(&mut buf)));
}
out.push(Inline::SoftBreak);
} else {
buf.push(ch);
}
}
if !buf.is_empty() {
out.push(Inline::Str(buf));
}
}
fn inline_from_node(node: &SyntaxNode) -> Inline {
match node.kind() {
SyntaxKind::EMPHASIS => {
Inline::Emph(coalesce_inlines_keep_edges(inlines_from_marked(node)))
}
SyntaxKind::STRONG => {
Inline::Strong(coalesce_inlines_keep_edges(inlines_from_marked(node)))
}
SyntaxKind::STRIKEOUT => {
Inline::Strikeout(coalesce_inlines_keep_edges(inlines_from_marked(node)))
}
SyntaxKind::SUPERSCRIPT => {
Inline::Superscript(coalesce_inlines_keep_edges(inlines_from_marked(node)))
}
SyntaxKind::SUBSCRIPT => {
Inline::Subscript(coalesce_inlines_keep_edges(inlines_from_marked(node)))
}
SyntaxKind::INLINE_CODE => {
let content: String = node
.children_with_tokens()
.filter_map(|el| el.into_token())
.filter(|t| t.kind() == SyntaxKind::INLINE_CODE_CONTENT)
.map(|t| t.text().to_string())
.collect();
Inline::Code(
extract_attr_from_node(node),
strip_inline_code_padding(&content),
)
}
SyntaxKind::LINK | SyntaxKind::IMAGE_LINK | SyntaxKind::UNRESOLVED_REFERENCE => {
Inline::Unsupported(format!("{:?}", node.kind()))
}
SyntaxKind::AUTO_LINK => autolink_inline(node),
SyntaxKind::INLINE_MATH => math_inline(node, "InlineMath"),
SyntaxKind::DISPLAY_MATH => math_inline(node, "DisplayMath"),
SyntaxKind::LATEX_COMMAND => latex_command_inline(node),
SyntaxKind::BRACKETED_SPAN => bracketed_span_inline(node),
SyntaxKind::INLINE_HTML => Inline::RawInline("html".to_string(), node.text().to_string()),
SyntaxKind::FOOTNOTE_REFERENCE => footnote_reference_inline(node),
SyntaxKind::INLINE_FOOTNOTE => inline_footnote_inline(node),
other => Inline::Unsupported(format!("{other:?}")),
}
}
fn inlines_from_marked(parent: &SyntaxNode) -> Vec<Inline> {
let mut out = Vec::new();
let mut iter = parent.children_with_tokens().peekable();
while let Some(el) = iter.next() {
match el {
NodeOrToken::Token(t) => match t.kind() {
SyntaxKind::EMPHASIS_MARKER
| SyntaxKind::STRONG_MARKER
| SyntaxKind::STRIKEOUT_MARKER
| SyntaxKind::SUPERSCRIPT_MARKER
| SyntaxKind::SUBSCRIPT_MARKER
| SyntaxKind::MARK_MARKER => {}
_ => push_token_inline(&t, &mut out),
},
NodeOrToken::Node(n) => match n.kind() {
SyntaxKind::EMPHASIS_MARKER
| SyntaxKind::STRONG_MARKER
| SyntaxKind::STRIKEOUT_MARKER
| SyntaxKind::SUPERSCRIPT_MARKER
| SyntaxKind::SUBSCRIPT_MARKER
| SyntaxKind::MARK_MARKER => {}
_ if n.kind() == SyntaxKind::LATEX_COMMAND => {
emit_latex_command_with_absorb(&n, &mut iter, &mut out);
}
_ => push_inline_node(&n, &mut out),
},
}
}
out
}
fn render_link_inline(node: &SyntaxNode, out: &mut Vec<Inline>) {
let text_node = node.children().find(|c| c.kind() == SyntaxKind::LINK_TEXT);
let dest_node = node.children().find(|c| c.kind() == SyntaxKind::LINK_DEST);
let has_dest_paren = node
.children_with_tokens()
.any(|el| matches!(el, NodeOrToken::Token(t) if t.kind() == SyntaxKind::LINK_DEST_START));
if has_dest_paren {
let text = text_node
.as_ref()
.map(|n| coalesce_inlines(inlines_from(n)))
.unwrap_or_default();
let (url, title) = dest_node
.as_ref()
.map(parse_link_dest)
.unwrap_or((String::new(), String::new()));
out.push(Inline::Link(extract_attr_from_node(node), text, url, title));
return;
}
let ref_node = node.children().find(|c| c.kind() == SyntaxKind::LINK_REF);
let resolved_text_inlines = text_node
.as_ref()
.map(|n| coalesce_inlines(inlines_from(n)))
.unwrap_or_default();
let text_label = text_node
.as_ref()
.map(|n| n.text().to_string())
.unwrap_or_default();
let (label, has_second_brackets, second_inner) = match ref_node.as_ref() {
Some(rn) => {
let inner = rn.text().to_string();
if inner.is_empty() {
(text_label.clone(), true, String::new())
} else {
(inner.clone(), true, inner)
}
}
None => (text_label.clone(), false, String::new()),
};
if let Some((url, title)) = lookup_ref(&label) {
out.push(Inline::Link(
extract_attr_from_node(node),
resolved_text_inlines,
url,
title,
));
return;
}
if let Some(id) = lookup_heading_id(&label) {
let url = format!("#{id}");
out.push(Inline::Link(
extract_attr_from_node(node),
resolved_text_inlines,
url,
String::new(),
));
return;
}
let unresolved_text_inlines = text_node
.as_ref()
.map(|n| coalesce_inlines_keep_edges(inlines_from(n)))
.unwrap_or_default();
out.push(Inline::Str("[".to_string()));
out.extend(unresolved_text_inlines);
let suffix = if has_second_brackets {
format!("][{second_inner}]")
} else {
"]".to_string()
};
out.push(Inline::Str(suffix));
}
fn render_image_inline(node: &SyntaxNode, out: &mut Vec<Inline>) {
let alt_node = node.children().find(|c| c.kind() == SyntaxKind::IMAGE_ALT);
let dest_node = node.children().find(|c| c.kind() == SyntaxKind::LINK_DEST);
let has_dest_paren = node.children_with_tokens().any(|el| {
matches!(el, NodeOrToken::Token(t) if t.kind() == SyntaxKind::IMAGE_DEST_START
|| t.kind() == SyntaxKind::LINK_DEST_START)
});
if has_dest_paren {
let alt = alt_node
.as_ref()
.map(|n| coalesce_inlines(inlines_from(n)))
.unwrap_or_default();
let (url, title) = dest_node
.as_ref()
.map(parse_link_dest)
.unwrap_or((String::new(), String::new()));
out.push(Inline::Image(extract_attr_from_node(node), alt, url, title));
return;
}
let ref_node = node.children().find(|c| c.kind() == SyntaxKind::LINK_REF);
let alt_inlines = alt_node
.as_ref()
.map(|n| coalesce_inlines(inlines_from(n)))
.unwrap_or_default();
let alt_label = alt_node
.as_ref()
.map(|n| n.text().to_string())
.unwrap_or_default();
let (label, has_second_brackets, second_inner) = match ref_node.as_ref() {
Some(rn) => {
let inner = rn.text().to_string();
if inner.is_empty() {
(alt_label.clone(), true, String::new())
} else {
(inner.clone(), true, inner)
}
}
None => (alt_label.clone(), false, String::new()),
};
if let Some((url, title)) = lookup_ref(&label) {
out.push(Inline::Image(
extract_attr_from_node(node),
alt_inlines,
url,
title,
));
return;
}
if let Some(id) = lookup_heading_id(&label) {
let url = format!("#{id}");
out.push(Inline::Image(
extract_attr_from_node(node),
alt_inlines,
url,
String::new(),
));
return;
}
out.push(Inline::Str("![".to_string()));
out.extend(alt_inlines);
let suffix = if has_second_brackets {
format!("][{second_inner}]")
} else {
"]".to_string()
};
out.push(Inline::Str(suffix));
}
fn strip_inline_code_padding(s: &str) -> String {
let collapsed: String = s.chars().map(|c| if c == '\n' { ' ' } else { c }).collect();
collapsed.trim().to_string()
}
fn math_inline(node: &SyntaxNode, kind: &'static str) -> Inline {
let mut content = String::new();
for el in node.children_with_tokens() {
if let NodeOrToken::Token(t) = el {
match t.kind() {
SyntaxKind::INLINE_MATH_MARKER | SyntaxKind::DISPLAY_MATH_MARKER => {}
_ => content.push_str(t.text()),
}
}
}
Inline::Math(kind, content)
}
fn autolink_inline(node: &SyntaxNode) -> Inline {
let mut url = String::new();
for el in node.children_with_tokens() {
if let NodeOrToken::Token(t) = el
&& t.kind() == SyntaxKind::TEXT
{
url.push_str(t.text());
}
}
let is_email = !url.contains("://") && !url.starts_with("mailto:") && url.contains('@');
if is_email {
let attr = Attr {
id: String::new(),
classes: vec!["email".to_string()],
kvs: Vec::new(),
};
let dest = format!("mailto:{url}");
return Inline::Link(attr, vec![Inline::Str(url)], dest, String::new());
}
if !is_known_uri_scheme(&url) {
return Inline::RawInline("html".to_string(), node.text().to_string());
}
let attr = Attr {
id: String::new(),
classes: vec!["uri".to_string()],
kvs: Vec::new(),
};
Inline::Link(attr, vec![Inline::Str(url.clone())], url, String::new())
}
fn is_known_uri_scheme(url: &str) -> bool {
let scheme_end = url.find(':');
let Some(end) = scheme_end else {
return false;
};
let scheme = url[..end].to_ascii_lowercase();
PANDOC_KNOWN_SCHEMES.binary_search(&scheme.as_str()).is_ok()
}
#[rustfmt::skip]
const PANDOC_KNOWN_SCHEMES: &[&str] = &[
"aaa", "aaas", "about", "acap", "acct", "acr",
"adiumxtra", "afp", "afs", "aim", "appdata", "apt",
"attachment", "aw", "barion", "beshare", "bitcoin", "blob",
"bolo", "browserext", "callto", "cap", "chrome", "chrome-extension",
"cid", "coap", "coaps", "com-eventbrite-attendee", "content", "crid",
"cvs", "data", "dav", "dict", "dis", "dlna-playcontainer",
"dlna-playsingle", "dns", "dntp", "doi", "dtn", "dvb",
"ed2k", "example", "facetime", "fax", "feed", "feedready",
"file", "filesystem", "finger", "fish", "ftp", "gemini",
"geo", "gg", "git", "gizmoproject", "go", "gopher",
"graph", "gtalk", "h323", "ham", "hcp", "http",
"https", "hxxp", "hxxps", "hydrazone", "iax", "icap",
"icon", "im", "imap", "info", "iotdisco", "ipn",
"ipp", "ipps", "irc", "irc6", "ircs", "iris",
"iris.beep", "iris.lwz", "iris.xpc", "iris.xpcs", "isbn", "isostore",
"itms", "jabber", "jar", "javascript", "jms", "keyparc",
"lastfm", "ldap", "ldaps", "lvlt", "magnet", "mailserver",
"mailto", "maps", "market", "message", "mid", "mms",
"modem", "mongodb", "moz", "ms-access", "ms-browser-extension", "ms-drive-to",
"ms-enrollment", "ms-excel", "ms-gamebarservices", "ms-getoffice", "ms-help", "ms-infopath",
"ms-media-stream-id", "ms-officeapp", "ms-powerpoint", "ms-project", "ms-publisher", "ms-search-repair",
"ms-secondary-screen-controller", "ms-secondary-screen-setup", "ms-settings", "ms-settings-airplanemode", "ms-settings-bluetooth", "ms-settings-camera",
"ms-settings-cellular", "ms-settings-cloudstorage", "ms-settings-connectabledevices", "ms-settings-displays-topology", "ms-settings-emailandaccounts", "ms-settings-language",
"ms-settings-location", "ms-settings-lock", "ms-settings-nfctransactions", "ms-settings-notifications", "ms-settings-power", "ms-settings-privacy",
"ms-settings-proximity", "ms-settings-screenrotation", "ms-settings-wifi", "ms-settings-workplace", "ms-spd", "ms-sttoverlay",
"ms-transit-to", "ms-virtualtouchpad", "ms-visio", "ms-walk-to", "ms-whiteboard", "ms-whiteboard-cmd",
"ms-word", "msnim", "msrp", "msrps", "mtqp", "mumble",
"mupdate", "mvn", "news", "nfs", "ni", "nih",
"nntp", "notes", "ocf", "oid", "onenote", "onenote-cmd",
"opaquelocktoken", "pack", "palm", "paparazzi", "pkcs11", "platform",
"pmid", "pop", "pres", "prospero", "proxy", "psyc",
"pwid", "qb", "query", "redis", "rediss", "reload",
"res", "resource", "rmi", "rsync", "rtmfp", "rtmp",
"rtsp", "rtsps", "rtspu", "secondlife", "service", "session",
"sftp", "sgn", "shttp", "sieve", "sip", "sips",
"skype", "smb", "sms", "smtp", "snews", "snmp",
"soap.beep", "soap.beeps", "soldat", "spotify", "ssh", "steam",
"stun", "stuns", "submit", "svn", "tag", "teamspeak",
"tel", "teliaeid", "telnet", "tftp", "things", "thismessage",
"tip", "tn3270", "tool", "turn", "turns", "tv",
"udp", "unreal", "urn", "ut2004", "v-event", "vemmi",
"ventrilo", "videotex", "view-source", "vnc", "wais", "webcal",
"wpid", "ws", "wss", "wtai", "wyciwyg", "xcon",
"xcon-userid", "xfire", "xmlrpc.beep", "xmlrpc.beeps", "xmpp", "xri",
"ymsgr", "z39.50", "z39.50r", "z39.50s",
];
fn footnote_reference_inline(node: &SyntaxNode) -> Inline {
let Some(label) = footnote_label(node) else {
return Inline::Unsupported("FOOTNOTE_REFERENCE".to_string());
};
let blocks = REFS_CTX.with(|c| {
c.borrow()
.footnotes
.get(&label)
.map(|bs| bs.iter().map(clone_block).collect::<Vec<_>>())
});
match blocks {
Some(bs) => Inline::Note(bs),
None => Inline::Str(node.text().to_string()),
}
}
fn inline_footnote_inline(node: &SyntaxNode) -> Inline {
let inlines = coalesce_inlines(inlines_from(node));
if inlines.is_empty() {
Inline::Note(Vec::new())
} else {
Inline::Note(vec![Block::Para(inlines)])
}
}
fn parse_link_dest(node: &SyntaxNode) -> (String, String) {
let raw = node.text().to_string();
let trimmed = raw.trim();
if let Some(rest) = trimmed.strip_prefix('<')
&& let Some(end) = rest.find('>')
{
let url = &rest[..end];
let after = rest[end + 1..].trim();
let title = parse_dest_title(after);
return (escape_link_dest(url), title);
}
let bytes = trimmed.as_bytes();
let mut url_end = trimmed.len();
let mut i = 0;
while i < bytes.len() {
if matches!(bytes[i], b' ' | b'\t' | b'\n') {
let mut j = i;
while j < bytes.len() && matches!(bytes[j], b' ' | b'\t' | b'\n') {
j += 1;
}
if j < bytes.len() && matches!(bytes[j], b'"' | b'\'' | b'(') {
url_end = i;
break;
}
i = j;
} else {
i += 1;
}
}
let url_raw = &trimmed[..url_end];
let title = parse_dest_title(trimmed[url_end..].trim());
(escape_link_dest(url_raw), title)
}
fn escape_link_dest(s: &str) -> String {
let mut out = String::with_capacity(s.len());
for ch in s.chars() {
let needs_escape = ch.is_whitespace()
|| matches!(
ch,
'<' | '>' | '|' | '"' | '{' | '}' | '[' | ']' | '^' | '`'
);
if needs_escape {
let mut buf = [0u8; 4];
for &b in ch.encode_utf8(&mut buf).as_bytes() {
out.push_str(&format!("%{b:02X}"));
}
} else {
out.push(ch);
}
}
out
}
fn parse_dest_title(s: &str) -> String {
let bytes = s.as_bytes();
if bytes.is_empty() {
return String::new();
}
let (open, close) = match bytes[0] {
b'"' => (b'"', b'"'),
b'\'' => (b'\'', b'\''),
b'(' => (b'(', b')'),
_ => return String::new(),
};
if !s.starts_with(open as char) {
return String::new();
}
if let Some(end) = s[1..].rfind(close as char) {
return s[1..1 + end].to_string();
}
String::new()
}
fn coalesce_inlines(input: Vec<Inline>) -> Vec<Inline> {
coalesce_inlines_inner(input, true)
}
fn coalesce_inlines_keep_edges(input: Vec<Inline>) -> Vec<Inline> {
coalesce_inlines_inner(input, false)
}
fn coalesce_inlines_inner(input: Vec<Inline>, trim_edges: bool) -> Vec<Inline> {
let mut out: Vec<Inline> = Vec::with_capacity(input.len());
for inline in input {
if let Inline::Str(s) = inline {
if let Some(Inline::Str(prev)) = out.last_mut() {
prev.push_str(&s);
} else {
out.push(Inline::Str(s));
}
} else if let Inline::Space = inline {
if matches!(out.last(), Some(Inline::Space) | Some(Inline::SoftBreak)) {
continue;
}
out.push(Inline::Space);
} else if let Inline::SoftBreak = inline {
if matches!(out.last(), Some(Inline::Space)) {
out.pop();
}
out.push(Inline::SoftBreak);
} else {
out.push(inline);
}
}
if trim_edges {
while matches!(out.first(), Some(Inline::Space) | Some(Inline::SoftBreak)) {
out.remove(0);
}
while matches!(out.last(), Some(Inline::Space) | Some(Inline::SoftBreak)) {
out.pop();
}
}
for inline in out.iter_mut() {
if let Inline::Str(s) = inline {
let mut t = smart_intraword_apostrophe(s);
t = smart_dashes_and_ellipsis(&t);
*s = t;
}
}
let out = smart_quote_pairs(out);
apply_abbreviations(out)
}
const PANDOC_ABBREVIATIONS: &[&str] = &[
"Apr.", "Aug.", "Bros.", "Capt.", "Co.", "Corp.", "Dec.", "Dr.", "Feb.", "Fr.", "Gen.", "Gov.",
"Hon.", "Inc.", "Jan.", "Jr.", "Jul.", "Jun.", "Ltd.", "M.A.", "M.D.", "Mar.", "Mr.", "Mrs.",
"Ms.", "No.", "Nov.", "Oct.", "Ph.D.", "Pres.", "Prof.", "Rep.", "Rev.", "Sen.", "Sep.",
"Sept.", "Sgt.", "Sr.", "St.", "aet.", "aetat.", "al.", "bk.", "c.", "cf.", "ch.", "chap.",
"chs.", "col.", "cp.", "d.", "e.g.", "ed.", "eds.", "esp.", "f.", "fasc.", "ff.", "fig.",
"fl.", "fol.", "fols.", "i.e.", "ill.", "incl.", "n.", "n.b.", "nn.", "p.", "pp.", "pt.",
"q.v.", "s.v.", "s.vv.", "saec.", "sec.", "univ.", "viz.", "vol.", "vs.",
];
fn matches_abbreviation_suffix(s: &str) -> bool {
for &abbr in PANDOC_ABBREVIATIONS {
if let Some(prefix) = s.strip_suffix(abbr) {
if prefix.is_empty() {
return true;
}
let last = prefix.chars().next_back().unwrap();
if !last.is_alphanumeric() && last != '.' {
return true;
}
}
}
false
}
fn apply_abbreviations(inlines: Vec<Inline>) -> Vec<Inline> {
let inlines: Vec<Inline> = inlines
.into_iter()
.map(|inline| match inline {
Inline::Quoted(kind, content) => Inline::Quoted(kind, apply_abbreviations(content)),
other => other,
})
.collect();
let mut out: Vec<Inline> = Vec::with_capacity(inlines.len());
let mut iter = inlines.into_iter().peekable();
while let Some(inline) = iter.next() {
if let Inline::Str(ref s) = inline
&& matches_abbreviation_suffix(s)
&& matches!(iter.peek(), Some(Inline::Space))
{
iter.next();
let Inline::Str(mut new_s) = inline else {
unreachable!()
};
new_s.push('\u{a0}');
if let Some(Inline::Str(_)) = iter.peek()
&& let Some(Inline::Str(next_s)) = iter.next()
{
new_s.push_str(&next_s);
}
out.push(Inline::Str(new_s));
} else {
out.push(inline);
}
}
out
}
fn smart_quote_pairs(inlines: Vec<Inline>) -> Vec<Inline> {
fn is_boundary(prev: Option<&Inline>) -> bool {
match prev {
None => true,
Some(Inline::Space | Inline::SoftBreak | Inline::LineBreak) => true,
Some(Inline::Str(s)) => s.chars().last().is_some_and(|c| !c.is_alphanumeric()),
_ => false,
}
}
let mut out: Vec<Inline> = Vec::with_capacity(inlines.len());
let n = inlines.len();
let mut consumed = vec![false; n];
for i in 0..n {
if consumed[i] {
continue;
}
let Inline::Str(s) = &inlines[i] else {
out.push(clone_inline(&inlines[i]));
consumed[i] = true;
continue;
};
let first = s.chars().next();
let quote = match first {
Some('"') => Some('"'),
Some('\'') => Some('\''),
_ => None,
};
let prev_is_boundary = is_boundary(out.last());
let str_has_more = s.chars().count() > 1;
let next_char_is_word = s.chars().nth(1).is_some_and(|c| !c.is_whitespace());
let next_is_markup_atom = matches!(
inlines.get(i + 1),
Some(
Inline::Emph(_)
| Inline::Strong(_)
| Inline::Strikeout(_)
| Inline::Superscript(_)
| Inline::Subscript(_)
| Inline::Code(_, _)
)
);
let attaches =
(str_has_more && next_char_is_word) || (!str_has_more && next_is_markup_atom);
if let Some(q) = quote
&& prev_is_boundary
&& attaches
{
if let Some(close_idx) = find_matching_close(&inlines, i, q, &consumed) {
let kind = if q == '"' {
"DoubleQuote"
} else {
"SingleQuote"
};
let mut content: Vec<Inline> = Vec::new();
for j in i..=close_idx {
if consumed[j] {
continue;
}
let inline = &inlines[j];
if j == i && j == close_idx {
if let Inline::Str(s) = inline {
let mut chars: Vec<char> = s.chars().collect();
if chars.len() >= 2 {
chars.remove(0);
chars.pop();
}
let stripped: String = chars.into_iter().collect();
if !stripped.is_empty() {
content.push(Inline::Str(stripped));
}
}
} else if j == i {
if let Inline::Str(s) = inline {
let stripped: String = s.chars().skip(1).collect();
if !stripped.is_empty() {
content.push(Inline::Str(stripped));
}
}
} else if j == close_idx {
if let Inline::Str(s) = inline {
let mut stripped: String = s.chars().collect();
stripped.pop();
if !stripped.is_empty() {
content.push(Inline::Str(stripped));
}
}
} else {
content.push(clone_inline(inline));
}
consumed[j] = true;
}
out.push(Inline::Quoted(kind, content));
continue;
}
}
out.push(clone_inline(&inlines[i]));
consumed[i] = true;
}
out
}
fn find_matching_close(
inlines: &[Inline],
open_idx: usize,
quote: char,
consumed: &[bool],
) -> Option<usize> {
if let Inline::Str(s) = &inlines[open_idx]
&& s.chars().count() >= 3
&& s.ends_with(quote)
{
let next = inlines.get(open_idx + 1);
let after_is_boundary = match next {
None => true,
Some(Inline::Space | Inline::SoftBreak | Inline::LineBreak) => true,
Some(Inline::Str(s)) => s.chars().next().is_some_and(|c| !c.is_alphanumeric()),
_ => false,
};
if after_is_boundary {
return Some(open_idx);
}
}
let n = inlines.len();
let mut j = open_idx + 1;
while j < n {
if consumed[j] {
return None;
}
match &inlines[j] {
Inline::Str(s) => {
if s.ends_with(quote) {
let next = inlines.get(j + 1);
let after_is_boundary = match next {
None => true,
Some(Inline::Space | Inline::SoftBreak | Inline::LineBreak) => true,
Some(Inline::Str(s)) => {
s.chars().next().is_some_and(|c| !c.is_alphanumeric())
}
_ => false,
};
if after_is_boundary {
return Some(j);
}
}
}
Inline::Space | Inline::SoftBreak | Inline::LineBreak => {}
_ => {}
}
j += 1;
if j - open_idx > 32 {
return None;
}
}
None
}
fn clone_inline(inline: &Inline) -> Inline {
match inline {
Inline::Str(s) => Inline::Str(s.clone()),
Inline::Space => Inline::Space,
Inline::SoftBreak => Inline::SoftBreak,
Inline::LineBreak => Inline::LineBreak,
Inline::Emph(c) => Inline::Emph(c.iter().map(clone_inline).collect()),
Inline::Strong(c) => Inline::Strong(c.iter().map(clone_inline).collect()),
Inline::Strikeout(c) => Inline::Strikeout(c.iter().map(clone_inline).collect()),
Inline::Superscript(c) => Inline::Superscript(c.iter().map(clone_inline).collect()),
Inline::Subscript(c) => Inline::Subscript(c.iter().map(clone_inline).collect()),
Inline::Code(a, s) => Inline::Code(a.clone(), s.clone()),
Inline::Link(a, t, u, ti) => Inline::Link(
a.clone(),
t.iter().map(clone_inline).collect(),
u.clone(),
ti.clone(),
),
Inline::Image(a, t, u, ti) => Inline::Image(
a.clone(),
t.iter().map(clone_inline).collect(),
u.clone(),
ti.clone(),
),
Inline::Math(k, c) => Inline::Math(k, c.clone()),
Inline::Span(a, c) => Inline::Span(a.clone(), c.iter().map(clone_inline).collect()),
Inline::RawInline(f, c) => Inline::RawInline(f.clone(), c.clone()),
Inline::Quoted(k, c) => Inline::Quoted(k, c.iter().map(clone_inline).collect()),
Inline::Note(blocks) => Inline::Note(blocks.iter().map(clone_block).collect()),
Inline::Cite(citations, text) => Inline::Cite(
citations
.iter()
.map(|c| Citation {
id: c.id.clone(),
prefix: c.prefix.iter().map(clone_inline).collect(),
suffix: c.suffix.iter().map(clone_inline).collect(),
mode: c.mode,
note_num: c.note_num,
hash: c.hash,
})
.collect(),
text.iter().map(clone_inline).collect(),
),
Inline::Unsupported(s) => Inline::Unsupported(s.clone()),
}
}
fn clone_block(b: &Block) -> Block {
match b {
Block::Para(c) => Block::Para(c.iter().map(clone_inline).collect()),
Block::Plain(c) => Block::Plain(c.iter().map(clone_inline).collect()),
Block::Header(lvl, a, c) => {
Block::Header(*lvl, a.clone(), c.iter().map(clone_inline).collect())
}
Block::BlockQuote(blocks) => Block::BlockQuote(blocks.iter().map(clone_block).collect()),
Block::CodeBlock(a, s) => Block::CodeBlock(a.clone(), s.clone()),
Block::HorizontalRule => Block::HorizontalRule,
Block::BulletList(items) => Block::BulletList(
items
.iter()
.map(|item| item.iter().map(clone_block).collect())
.collect(),
),
Block::OrderedList(start, style, delim, items) => Block::OrderedList(
*start,
style,
delim,
items
.iter()
.map(|item| item.iter().map(clone_block).collect())
.collect(),
),
Block::RawBlock(f, c) => Block::RawBlock(f.clone(), c.clone()),
Block::Table(_) => Block::Unsupported("Table".to_string()),
Block::Div(a, blocks) => Block::Div(a.clone(), blocks.iter().map(clone_block).collect()),
Block::LineBlock(lines) => Block::LineBlock(
lines
.iter()
.map(|line| line.iter().map(clone_inline).collect())
.collect(),
),
Block::DefinitionList(items) => Block::DefinitionList(
items
.iter()
.map(|(term, defs)| {
(
term.iter().map(clone_inline).collect(),
defs.iter()
.map(|d| d.iter().map(clone_block).collect())
.collect(),
)
})
.collect(),
),
Block::Figure(a, caption, body) => Block::Figure(
a.clone(),
caption.iter().map(clone_block).collect(),
body.iter().map(clone_block).collect(),
),
Block::Unsupported(s) => Block::Unsupported(s.clone()),
}
}
fn smart_dashes_and_ellipsis(s: &str) -> String {
if !s.contains(['-', '.']) {
return s.to_string();
}
let bytes = s.as_bytes();
let mut out = String::with_capacity(s.len());
let mut i = 0usize;
while i < bytes.len() {
if bytes[i] == b'-' {
if i + 2 < bytes.len() && bytes[i + 1] == b'-' && bytes[i + 2] == b'-' {
out.push('\u{2014}');
i += 3;
continue;
}
if i + 1 < bytes.len() && bytes[i + 1] == b'-' {
out.push('\u{2013}');
i += 2;
continue;
}
}
if bytes[i] == b'.' && i + 2 < bytes.len() && bytes[i + 1] == b'.' && bytes[i + 2] == b'.' {
out.push('\u{2026}');
i += 3;
continue;
}
let len = utf8_char_len(bytes[i]);
out.push_str(&s[i..i + len]);
i += len;
}
out
}
fn utf8_char_len(b: u8) -> usize {
if b < 0xc0 {
1
} else if b < 0xe0 {
2
} else if b < 0xf0 {
3
} else {
4
}
}
fn smart_intraword_apostrophe(s: &str) -> String {
if !s.contains('\'') {
return s.to_string();
}
let chars: Vec<char> = s.chars().collect();
let mut out = String::with_capacity(s.len());
for (i, &c) in chars.iter().enumerate() {
if c == '\'' {
let prev = i.checked_sub(1).map(|j| chars[j]);
let next = chars.get(i + 1).copied();
let prev_word = prev.is_some_and(is_word_char);
let next_word = next.is_some_and(is_word_char);
if prev_word && next_word {
out.push('\u{2019}');
continue;
}
}
out.push(c);
}
out
}
fn is_word_char(c: char) -> bool {
c.is_alphanumeric()
}
fn inlines_to_plaintext(inlines: &[Inline]) -> String {
let mut s = String::new();
for i in inlines {
match i {
Inline::Str(t) => s.push_str(t),
Inline::Space | Inline::SoftBreak => s.push(' '),
Inline::LineBreak => s.push(' '),
Inline::Emph(children)
| Inline::Strong(children)
| Inline::Strikeout(children)
| Inline::Superscript(children)
| Inline::Subscript(children) => s.push_str(&inlines_to_plaintext(children)),
Inline::Code(_, c) => s.push_str(c),
Inline::Link(_, alt, _, _) | Inline::Image(_, alt, _, _) => {
s.push_str(&inlines_to_plaintext(alt))
}
Inline::Math(_, c) => s.push_str(c),
Inline::Span(_, children) => s.push_str(&inlines_to_plaintext(children)),
Inline::RawInline(_, _) => {}
Inline::Quoted(_, children) => s.push_str(&inlines_to_plaintext(children)),
Inline::Note(_) => {}
Inline::Cite(_, text) => s.push_str(&inlines_to_plaintext(text)),
Inline::Unsupported(_) => {}
}
}
s
}
fn pandoc_slugify(text: &str) -> String {
let mut out = String::new();
let mut prev_dash = false;
for ch in text.chars() {
if ch.is_whitespace() {
if !out.is_empty() && !prev_dash {
out.push('-');
prev_dash = true;
}
continue;
}
for lc in ch.to_lowercase() {
if lc.is_alphanumeric() || lc == '_' || lc == '-' || lc == '.' {
out.push(lc);
prev_dash = lc == '-';
}
}
}
while out.ends_with('-') {
out.pop();
}
out
}
impl Attr {
fn with_id(id: String) -> Self {
Self {
id,
classes: Vec::new(),
kvs: Vec::new(),
}
}
}
fn write_block(b: &Block, out: &mut String) {
match b {
Block::Para(inlines) => {
out.push_str("Para [");
write_inline_list(inlines, out);
out.push_str(" ]");
}
Block::Plain(inlines) => {
out.push_str("Plain [");
write_inline_list(inlines, out);
out.push_str(" ]");
}
Block::Header(level, attr, inlines) => {
out.push_str(&format!("Header {level} ("));
write_attr(attr, out);
out.push_str(") [");
write_inline_list(inlines, out);
out.push_str(" ]");
}
Block::BlockQuote(blocks) => {
out.push_str("BlockQuote [");
write_block_list(blocks, out);
out.push_str(" ]");
}
Block::CodeBlock(attr, content) => {
out.push_str("CodeBlock (");
write_attr(attr, out);
out.push_str(") ");
write_haskell_string(content, out);
}
Block::HorizontalRule => out.push_str("HorizontalRule"),
Block::BulletList(items) => {
out.push_str("BulletList [");
for (i, item) in items.iter().enumerate() {
if i > 0 {
out.push(',');
}
out.push_str(" [");
write_block_list(item, out);
out.push_str(" ]");
}
out.push_str(" ]");
}
Block::OrderedList(start, style, delim, items) => {
out.push_str(&format!("OrderedList ( {start} , {style} , {delim} ) ["));
for (i, item) in items.iter().enumerate() {
if i > 0 {
out.push(',');
}
out.push_str(" [");
write_block_list(item, out);
out.push_str(" ]");
}
out.push_str(" ]");
}
Block::RawBlock(format, content) => {
out.push_str("RawBlock ( Format ");
write_haskell_string(format, out);
out.push_str(" ) ");
write_haskell_string(content, out);
}
Block::Table(data) => {
write_table(data, out);
}
Block::Div(attr, blocks) => {
out.push_str("Div (");
write_attr(attr, out);
out.push_str(") [");
write_block_list(blocks, out);
out.push_str(" ]");
}
Block::LineBlock(lines) => {
out.push_str("LineBlock [");
for (i, line) in lines.iter().enumerate() {
if i > 0 {
out.push(',');
}
out.push_str(" [");
write_inline_list(line, out);
out.push_str(" ]");
}
out.push_str(" ]");
}
Block::DefinitionList(items) => {
out.push_str("DefinitionList [");
for (i, (term, defs)) in items.iter().enumerate() {
if i > 0 {
out.push(',');
}
out.push_str(" ( [");
write_inline_list(term, out);
out.push_str(" ] , [");
for (j, def) in defs.iter().enumerate() {
if j > 0 {
out.push(',');
}
out.push_str(" [");
write_block_list(def, out);
out.push_str(" ]");
}
out.push_str(" ] )");
}
out.push_str(" ]");
}
Block::Figure(attr, caption, body) => {
out.push_str("Figure (");
write_attr(attr, out);
out.push_str(") ( Caption Nothing [");
write_block_list(caption, out);
out.push_str(" ] ) [");
write_block_list(body, out);
out.push_str(" ]");
}
Block::Unsupported(name) => {
out.push_str(&format!("Unsupported {name:?}"));
}
}
}
fn write_table(data: &TableData, out: &mut String) {
out.push_str("Table (");
write_attr(&data.attr, out);
out.push_str(") ( Caption Nothing [");
if !data.caption.is_empty() {
out.push_str(" Plain [");
write_inline_list(&data.caption, out);
out.push_str(" ]");
}
out.push_str(" ] ) [");
for (i, align) in data.aligns.iter().enumerate() {
if i > 0 {
out.push(',');
}
let width = data.widths.get(i).copied().unwrap_or(None);
match width {
None => out.push_str(&format!(" ( {align} , ColWidthDefault )")),
Some(w) => out.push_str(&format!(" ( {align} , ColWidth {} )", show_double(w))),
}
}
out.push_str(" ] ( TableHead ( \"\" , [ ] , [ ] ) [");
for (i, row) in data.head_rows.iter().enumerate() {
if i > 0 {
out.push(',');
}
out.push(' ');
write_table_row(row, out);
}
out.push_str(" ] ) [ TableBody ( \"\" , [ ] , [ ] ) ( RowHeadColumns 0 ) [ ] [");
for (i, row) in data.body_rows.iter().enumerate() {
if i > 0 {
out.push(',');
}
out.push(' ');
write_table_row(row, out);
}
out.push_str(" ] ] ( TableFoot ( \"\" , [ ] , [ ] ) [");
for (i, row) in data.foot_rows.iter().enumerate() {
if i > 0 {
out.push(',');
}
out.push(' ');
write_table_row(row, out);
}
out.push_str(" ] )");
}
fn write_table_row(cells: &[GridCell], out: &mut String) {
out.push_str("Row ( \"\" , [ ] , [ ] ) [");
for (i, cell) in cells.iter().enumerate() {
if i > 0 {
out.push(',');
}
out.push_str(&format!(
" Cell ( \"\" , [ ] , [ ] ) AlignDefault ( RowSpan {} ) ( ColSpan {} ) [",
cell.row_span, cell.col_span
));
if !cell.blocks.is_empty() {
write_block_list(&cell.blocks, out);
}
out.push_str(" ]");
}
out.push_str(" ]");
}
fn write_block_list(blocks: &[Block], out: &mut String) {
for (i, b) in blocks.iter().enumerate() {
if i > 0 {
out.push(',');
}
out.push(' ');
write_block(b, out);
}
}
fn write_inline_list(inlines: &[Inline], out: &mut String) {
for (i, inline) in inlines.iter().enumerate() {
if i > 0 {
out.push(',');
}
out.push(' ');
write_inline(inline, out);
}
}
fn write_inline(inline: &Inline, out: &mut String) {
match inline {
Inline::Str(s) => {
out.push_str("Str ");
write_haskell_string(s, out);
}
Inline::Space => out.push_str("Space"),
Inline::SoftBreak => out.push_str("SoftBreak"),
Inline::LineBreak => out.push_str("LineBreak"),
Inline::Emph(children) => {
out.push_str("Emph [");
write_inline_list(children, out);
out.push_str(" ]");
}
Inline::Strong(children) => {
out.push_str("Strong [");
write_inline_list(children, out);
out.push_str(" ]");
}
Inline::Strikeout(children) => {
out.push_str("Strikeout [");
write_inline_list(children, out);
out.push_str(" ]");
}
Inline::Superscript(children) => {
out.push_str("Superscript [");
write_inline_list(children, out);
out.push_str(" ]");
}
Inline::Subscript(children) => {
out.push_str("Subscript [");
write_inline_list(children, out);
out.push_str(" ]");
}
Inline::Code(attr, content) => {
out.push_str("Code (");
write_attr(attr, out);
out.push_str(") ");
write_haskell_string(content, out);
}
Inline::Link(attr, text, url, title) => {
out.push_str("Link (");
write_attr(attr, out);
out.push_str(") [");
write_inline_list(text, out);
out.push_str(" ] ( ");
write_haskell_string(url, out);
out.push_str(" , ");
write_haskell_string(title, out);
out.push_str(" )");
}
Inline::Image(attr, alt, url, title) => {
out.push_str("Image (");
write_attr(attr, out);
out.push_str(") [");
write_inline_list(alt, out);
out.push_str(" ] ( ");
write_haskell_string(url, out);
out.push_str(" , ");
write_haskell_string(title, out);
out.push_str(" )");
}
Inline::Math(kind, content) => {
out.push_str("Math ");
out.push_str(kind);
out.push(' ');
write_haskell_string(content, out);
}
Inline::Span(attr, children) => {
out.push_str("Span (");
write_attr(attr, out);
out.push_str(") [");
write_inline_list(children, out);
out.push_str(" ]");
}
Inline::RawInline(format, content) => {
out.push_str("RawInline ( Format ");
write_haskell_string(format, out);
out.push_str(" ) ");
write_haskell_string(content, out);
}
Inline::Quoted(kind, children) => {
out.push_str("Quoted ");
out.push_str(kind);
out.push_str(" [");
write_inline_list(children, out);
out.push_str(" ]");
}
Inline::Note(blocks) => {
out.push_str("Note [");
write_block_list(blocks, out);
out.push_str(" ]");
}
Inline::Cite(citations, text) => {
out.push_str("Cite [");
for (i, c) in citations.iter().enumerate() {
if i > 0 {
out.push(',');
}
out.push_str(" Citation { citationId = ");
write_haskell_string(&c.id, out);
out.push_str(" , citationPrefix = [");
write_inline_list(&c.prefix, out);
out.push_str(" ] , citationSuffix = [");
write_inline_list(&c.suffix, out);
out.push_str(" ] , citationMode = ");
out.push_str(match c.mode {
CitationMode::AuthorInText => "AuthorInText",
CitationMode::NormalCitation => "NormalCitation",
CitationMode::SuppressAuthor => "SuppressAuthor",
});
out.push_str(&format!(
" , citationNoteNum = {} , citationHash = {} }}",
c.note_num, c.hash
));
}
out.push_str(" ] [");
write_inline_list(text, out);
out.push_str(" ]");
}
Inline::Unsupported(name) => {
out.push_str(&format!("Unsupported {name:?}"));
}
}
}
fn write_attr(attr: &Attr, out: &mut String) {
out.push(' ');
write_haskell_string(&attr.id, out);
out.push_str(" , [");
for (i, c) in attr.classes.iter().enumerate() {
if i > 0 {
out.push(',');
}
out.push(' ');
write_haskell_string(c, out);
}
if !attr.classes.is_empty() {
out.push(' ');
}
out.push_str("] , [");
for (i, (k, v)) in attr.kvs.iter().enumerate() {
if i > 0 {
out.push(',');
}
out.push_str(" ( ");
write_haskell_string(k, out);
out.push_str(" , ");
write_haskell_string(v, out);
out.push_str(" )");
}
if !attr.kvs.is_empty() {
out.push(' ');
}
out.push_str("] ");
}
fn write_haskell_string(s: &str, out: &mut String) {
out.push('"');
let mut prev_was_numeric_escape = false;
for ch in s.chars() {
let code = ch as u32;
let is_ascii_printable = (0x20..0x7f).contains(&code);
match ch {
'"' => {
out.push_str("\\\"");
prev_was_numeric_escape = false;
}
'\\' => {
out.push_str("\\\\");
prev_was_numeric_escape = false;
}
'\n' => {
out.push_str("\\n");
prev_was_numeric_escape = false;
}
'\t' => {
out.push_str("\\t");
prev_was_numeric_escape = false;
}
'\r' => {
out.push_str("\\r");
prev_was_numeric_escape = false;
}
_ if is_ascii_printable => {
if prev_was_numeric_escape && ch.is_ascii_digit() {
out.push_str("\\&");
}
out.push(ch);
prev_was_numeric_escape = false;
}
_ => {
out.push('\\');
out.push_str(&code.to_string());
prev_was_numeric_escape = true;
}
}
}
out.push('"');
}