1use std::borrow::Cow;
14use std::collections::BTreeMap;
15use std::mem;
16
17use carta_ast::{
18 Alignment, Attr, Block, Caption, Cell, ColSpec, ColWidth, Document, Format, Inline,
19 ListAttributes, ListNumberDelim, ListNumberStyle, MathType, MetaValue, QuoteType, Row, Table,
20 TableBody, TableFoot, TableHead, Text, slug, slug_gfm,
21};
22use carta_core::{Extension, Extensions, Reader, ReaderOptions, Result};
23
24use crate::heading_ids::{IdRegistry, IdScheme, fold_to_ascii};
25
26#[derive(Debug, Default, Clone, Copy)]
32pub struct OrgReader;
33
34impl Reader for OrgReader {
35 fn read(&self, input: &str, options: &ReaderOptions) -> Result<Document> {
36 let ext = options.extensions;
37 let normalized = normalize(input);
38 let lines: Vec<&str> = normalized.split('\n').collect();
39
40 let (body_lines, defs) = collect_footnotes(&lines);
41
42 let empty_notes: BTreeMap<String, Vec<Block>> = BTreeMap::new();
45 let mut notes: BTreeMap<String, Vec<Block>> = BTreeMap::new();
46 for (label, text) in &defs {
47 let def_lines: Vec<&str> = text.split('\n').collect();
48 let mut throwaway_ids = new_id_registry();
49 let mut throwaway_meta = BTreeMap::new();
50 let blocks = parse_blocks(
51 &def_lines,
52 ext,
53 &empty_notes,
54 &mut throwaway_ids,
55 &mut throwaway_meta,
56 );
57 notes.insert(label.clone(), blocks);
58 }
59
60 let mut ids = new_id_registry();
61 let mut meta: BTreeMap<Text, MetaValue> = BTreeMap::new();
62 let blocks = parse_blocks(&body_lines, ext, ¬es, &mut ids, &mut meta);
63
64 Ok(Document {
65 meta,
66 blocks,
67 ..Document::default()
68 })
69 }
70}
71
72fn normalize(input: &str) -> Cow<'_, str> {
75 if input.contains('\r') {
76 Cow::Owned(input.replace("\r\n", "\n").replace('\r', "\n"))
77 } else {
78 Cow::Borrowed(input)
79 }
80}
81
82fn collect_footnotes<'a>(lines: &[&'a str]) -> (Vec<&'a str>, Vec<(String, String)>) {
89 let mut body = Vec::new();
90 let mut defs = Vec::new();
91 let mut i = 0;
92 while let Some(line) = lines.get(i) {
93 if let Some((label, first)) = footnote_definition(line) {
94 let mut collected = vec![first];
95 i += 1;
96 while let Some(next) = lines.get(i) {
97 if footnote_definition(next).is_some() || headline_level(next).is_some() {
98 break;
99 }
100 if next.trim().is_empty()
101 && lines
102 .get(i + 1)
103 .is_none_or(|following| following.trim().is_empty())
104 {
105 break;
106 }
107 collected.push((*next).to_owned());
108 i += 1;
109 }
110 defs.push((label, collected.join("\n")));
111 } else {
112 body.push(*line);
113 i += 1;
114 }
115 }
116 (body, defs)
117}
118
119fn footnote_definition(line: &str) -> Option<(String, String)> {
122 let rest = line.strip_prefix("[fn:")?;
123 let close = rest.find(']')?;
124 let label = &rest[..close];
125 if label.is_empty() || !label.chars().all(is_footnote_label_char) {
126 return None;
127 }
128 let after = rest.get(close + 1..).unwrap_or("");
129 Some((label.to_owned(), after.trim_start().to_owned()))
130}
131
132fn is_footnote_label_char(c: char) -> bool {
133 c.is_ascii_alphanumeric() || matches!(c, '_' | '-')
134}
135
136fn new_id_registry() -> IdRegistry {
141 let mut ids = IdRegistry::default();
142 ids.reserve_native("section");
143 ids
144}
145
146fn assign_id(ids: &mut IdRegistry, text: &str, ext: Extensions) -> String {
150 let Some(scheme) = IdScheme::select(ext, true) else {
151 return String::new();
152 };
153 let folded;
154 let source = if ext.contains(Extension::AsciiIdentifiers) {
155 folded = fold_to_ascii(text);
156 folded.as_str()
157 } else {
158 text
159 };
160 let base = match scheme {
161 IdScheme::Plain => slug(source),
162 IdScheme::Gfm => slug_gfm(source),
163 };
164 ids.assign_native(base)
165}
166
167#[derive(Default)]
171struct Affiliated {
172 caption: Option<Vec<Inline>>,
173 name: Option<String>,
174}
175
176impl Affiliated {
177 fn is_empty(&self) -> bool {
178 self.caption.is_none() && self.name.is_none()
179 }
180}
181
182#[allow(clippy::too_many_lines)]
183fn parse_blocks(
184 lines: &[&str],
185 ext: Extensions,
186 notes: &BTreeMap<String, Vec<Block>>,
187 ids: &mut IdRegistry,
188 meta: &mut BTreeMap<Text, MetaValue>,
189) -> Vec<Block> {
190 let mut out = Vec::new();
191 let mut pending = Affiliated::default();
192 let mut i = 0;
193 while let Some(&line) = lines.get(i) {
194 if line.trim().is_empty() {
195 i += 1;
196 continue;
197 }
198 if let Some(level) = headline_level(line) {
200 i += 1;
201 let mut id_override = None;
202 if let Some((custom_id, skip)) = read_property_drawer(lines, i) {
203 id_override = custom_id;
204 i += skip;
205 }
206 out.push(build_headline(line, level, id_override, ext, notes, ids));
207 pending = Affiliated::default();
208 continue;
209 }
210 if let Some(name) = greater_block_open(line) {
212 let (block, consumed) = parse_greater_block(lines, i, &name, ext, notes, ids, meta);
213 i += consumed;
214 if let Some(block) = block {
215 out.push(apply_affiliated(block, &mut pending));
216 }
217 continue;
218 }
219 if let Some((key, value)) = keyword_line(line) {
221 handle_keyword(&key, &value, line, ext, notes, meta, &mut pending, &mut out);
222 i += 1;
223 continue;
224 }
225 if line.trim_start() == "#" || line.trim_start().starts_with("# ") {
227 i += 1;
228 continue;
229 }
230 if is_horizontal_rule(line) {
232 out.push(Block::HorizontalRule);
233 i += 1;
234 pending = Affiliated::default();
235 continue;
236 }
237 if is_fixed_width(line) {
239 let (text, consumed) = collect_fixed_width(lines, i);
240 out.push(Block::CodeBlock(Box::default(), text.into()));
241 i += consumed;
242 pending = Affiliated::default();
243 continue;
244 }
245 if let Some(name) = drawer_open(line) {
247 let (inner, consumed) = collect_drawer(lines, i);
248 i += consumed;
249 if name.eq_ignore_ascii_case("PROPERTIES") || name.eq_ignore_ascii_case("LOGBOOK") {
252 pending = Affiliated::default();
253 continue;
254 }
255 let body = parse_blocks(&inner, ext, notes, ids, meta);
256 let attr = Attr {
257 classes: vec![name.into(), "drawer".into()],
258 ..Attr::default()
259 };
260 out.push(Block::Div(Box::new(attr), body));
261 pending = Affiliated::default();
262 continue;
263 }
264 if is_table_line(line) {
266 let (rows, consumed) = collect_table(lines, i);
267 let table = build_table(&rows, ext, notes, &mut pending);
268 out.push(table);
269 i += consumed;
270 continue;
271 }
272 if list_marker(line).is_some() {
274 let (block, consumed) = parse_list(lines, i, ext, notes, ids, meta);
275 i += consumed;
276 if let Some(block) = block {
277 out.push(block);
278 }
279 pending = Affiliated::default();
280 continue;
281 }
282 let start = i;
285 i += 1;
286 while let Some(&l) = lines.get(i) {
287 if l.trim().is_empty() || opens_block(l) {
288 break;
289 }
290 i += 1;
291 }
292 let text = lines
293 .get(start..i)
294 .unwrap_or(&[])
295 .iter()
296 .map(|l| l.trim())
297 .collect::<Vec<_>>()
298 .join("\n");
299 let para = Block::Para(parse_inlines(&text, ext, notes));
300 out.push(apply_affiliated(para, &mut pending));
301 }
302 out
303}
304
305fn opens_block(line: &str) -> bool {
307 headline_level(line).is_some()
308 || greater_block_open(line).is_some()
309 || keyword_line(line).is_some()
310 || line.trim_start() == "#"
311 || line.trim_start().starts_with("# ")
312 || is_horizontal_rule(line)
313 || is_fixed_width(line)
314 || drawer_open(line).is_some()
315 || is_table_line(line)
316 || list_marker(line).is_some()
317}
318
319fn apply_affiliated(block: Block, pending: &mut Affiliated) -> Block {
322 if pending.is_empty() {
323 return block;
324 }
325 let Affiliated { caption, name } = mem::take(pending);
326 match block {
327 Block::Para(inlines) if is_lone_image(&inlines) => {
328 let attr = Attr {
329 id: name.unwrap_or_default().into(),
330 ..Attr::default()
331 };
332 let long = caption.map(|c| vec![Block::Plain(c)]).unwrap_or_default();
333 Block::Figure(
334 Box::new(attr),
335 Box::new(Caption { short: None, long }),
336 vec![Block::Plain(inlines)],
337 )
338 }
339 Block::CodeBlock(mut attr, text) => {
340 if let Some(name) = name {
341 attr.id = name.into();
342 }
343 Block::CodeBlock(attr, text)
344 }
345 other => other,
346 }
347}
348
349fn is_lone_image(inlines: &[Inline]) -> bool {
350 matches!(inlines, [Inline::Image(..)])
351}
352
353fn headline_level(line: &str) -> Option<usize> {
356 let stars = line.len() - line.trim_start_matches('*').len();
357 if stars == 0 {
358 return None;
359 }
360 match line.as_bytes().get(stars) {
361 Some(b' ') => Some(stars),
362 _ => None,
363 }
364}
365
366fn build_headline(
369 line: &str,
370 level: usize,
371 id_override: Option<String>,
372 ext: Extensions,
373 notes: &BTreeMap<String, Vec<Block>>,
374 ids: &mut IdRegistry,
375) -> Block {
376 let rest = line.get(level..).unwrap_or("").trim();
377
378 let (todo, rest) = split_todo_keyword(rest);
379 let (title_text, tags) = split_tags(rest);
380
381 let title_inlines = parse_inlines(title_text, ext, notes);
382
383 let id = if let Some(custom) = id_override {
384 ids.reserve_native(&custom);
385 custom
386 } else {
387 assign_id(ids, &carta_ast::to_plain_text(&title_inlines), ext)
388 };
389
390 let mut inlines = Vec::new();
391 if let Some(keyword) = todo {
392 inlines.push(todo_span(keyword));
393 inlines.push(Inline::Space);
394 }
395 inlines.extend(title_inlines);
396 if !tags.is_empty() {
397 inlines.push(Inline::Space);
398 for (n, tag) in tags.iter().enumerate() {
399 if n > 0 {
400 inlines.push(Inline::Str("\u{a0}".into()));
401 }
402 inlines.push(tag_span(tag));
403 }
404 }
405
406 let attr = Attr {
407 id: id.into(),
408 ..Attr::default()
409 };
410 let level = i32::try_from(level).unwrap_or(6).clamp(1, 6);
411 Block::Header(level, Box::new(attr), inlines)
412}
413
414fn todo_span(keyword: &str) -> Inline {
415 let state = if keyword == "DONE" { "done" } else { "todo" };
416 let attr = Attr {
417 classes: vec![state.into(), keyword.into()],
418 ..Attr::default()
419 };
420 Inline::Span(Box::new(attr), vec![Inline::Str(keyword.into())])
421}
422
423fn tag_span(tag: &str) -> Inline {
424 let attr = Attr {
425 classes: vec!["tag".into()],
426 attributes: vec![("tag-name".into(), tag.into())],
427 ..Attr::default()
428 };
429 Inline::Span(
430 Box::new(attr),
431 vec![Inline::SmallCaps(vec![Inline::Str(tag.into())])],
432 )
433}
434
435fn split_todo_keyword(rest: &str) -> (Option<&str>, &str) {
438 for keyword in ["TODO", "DONE"] {
439 if let Some(after) = rest.strip_prefix(keyword)
440 && (after.is_empty() || after.starts_with(' '))
441 {
442 return (Some(keyword), after.trim_start());
443 }
444 }
445 (None, rest)
446}
447
448fn split_tags(rest: &str) -> (&str, Vec<String>) {
450 let trimmed = rest.trim_end();
451 if !trimmed.ends_with(':') {
452 return (rest, Vec::new());
453 }
454 let Some(space) = trimmed.rfind(char::is_whitespace) else {
455 return (rest, Vec::new());
456 };
457 let candidate = trimmed.get(space + 1..).unwrap_or("");
458 if candidate.len() < 2 || !candidate.starts_with(':') || !candidate.ends_with(':') {
459 return (rest, Vec::new());
460 }
461 let inner = &candidate[1..candidate.len() - 1];
462 if inner.is_empty()
463 || !inner
464 .chars()
465 .all(|c| c.is_ascii_alphanumeric() || matches!(c, '_' | '@' | '#' | '%' | ':'))
466 {
467 return (rest, Vec::new());
468 }
469 let tags: Vec<String> = inner
470 .split(':')
471 .filter(|t| !t.is_empty())
472 .map(str::to_owned)
473 .collect();
474 if tags.is_empty() {
475 return (rest, Vec::new());
476 }
477 (trimmed.get(..space).unwrap_or("").trim_end(), tags)
478}
479
480fn read_property_drawer(lines: &[&str], start: usize) -> Option<(Option<String>, usize)> {
483 let first = lines.get(start)?;
484 if !first.trim().eq_ignore_ascii_case(":PROPERTIES:") {
485 return None;
486 }
487 let mut custom = None;
488 let mut i = start + 1;
489 while let Some(line) = lines.get(i) {
490 let trimmed = line.trim();
491 if trimmed.eq_ignore_ascii_case(":END:") {
492 return Some((custom, i + 1 - start));
493 }
494 if let Some(rest) = trimmed.strip_prefix(':')
495 && let Some((key, value)) = rest.split_once(':')
496 && key.eq_ignore_ascii_case("CUSTOM_ID")
497 {
498 custom = Some(value.trim().to_owned());
499 }
500 i += 1;
501 }
502 None
504}
505
506fn greater_block_open(line: &str) -> Option<String> {
511 let trimmed = line.trim_start();
512 let rest = strip_prefix_ci(trimmed, "#+begin_")?;
513 let name: String = rest
514 .chars()
515 .take_while(|c| !c.is_whitespace())
516 .collect::<String>();
517 if name.is_empty() { None } else { Some(name) }
518}
519
520#[allow(clippy::too_many_arguments)]
521fn parse_greater_block(
522 lines: &[&str],
523 start: usize,
524 name: &str,
525 ext: Extensions,
526 notes: &BTreeMap<String, Vec<Block>>,
527 ids: &mut IdRegistry,
528 meta: &mut BTreeMap<Text, MetaValue>,
529) -> (Option<Block>, usize) {
530 let open_line = lines.get(start).copied().unwrap_or("");
533 let header_args = strip_prefix_ci(open_line.trim_start(), "#+begin_")
534 .unwrap_or("")
535 .get(name.len()..)
536 .unwrap_or("")
537 .trim();
538
539 let lower = name.to_ascii_lowercase();
540 let end_marker = format!("#+end_{lower}");
541 let mut depth = 1usize;
542 let mut content: Vec<&str> = Vec::new();
543 let mut i = start + 1;
544 while let Some(&line) = lines.get(i) {
545 let t = line.trim_start();
546 if let Some(open) = greater_block_open(line)
547 && open.eq_ignore_ascii_case(name)
548 {
549 depth += 1;
550 }
551 if t.eq_ignore_ascii_case(&end_marker) {
552 depth -= 1;
553 if depth == 0 {
554 i += 1;
555 break;
556 }
557 }
558 content.push(line);
559 i += 1;
560 }
561 let consumed = i - start;
562
563 let block = match lower.as_str() {
564 "src" => {
565 let lang = header_args
566 .split_whitespace()
567 .next()
568 .unwrap_or("")
569 .to_owned();
570 let attr = Attr {
571 classes: if lang.is_empty() {
572 vec![]
573 } else {
574 vec![lang.into()]
575 },
576 ..Attr::default()
577 };
578 Some(Block::CodeBlock(
579 Box::new(attr),
580 dedent_verbatim(&content).into(),
581 ))
582 }
583 "example" => Some(Block::CodeBlock(
584 Box::default(),
585 dedent_verbatim(&content).into(),
586 )),
587 "export" => {
588 let fmt = header_args
589 .split_whitespace()
590 .next()
591 .unwrap_or("")
592 .to_owned();
593 Some(Block::RawBlock(
594 Format(fmt.into()),
595 verbatim(&content).into(),
596 ))
597 }
598 "quote" => Some(Block::BlockQuote(parse_blocks(
599 &content, ext, notes, ids, meta,
600 ))),
601 "verse" => Some(Block::LineBlock(
602 content
603 .iter()
604 .map(|l| parse_inlines(l.trim(), ext, notes))
605 .collect(),
606 )),
607 "comment" => None,
608 _ => {
609 let attr = Attr {
610 classes: vec![name.into()],
611 ..Attr::default()
612 };
613 Some(Block::Div(
614 Box::new(attr),
615 parse_blocks(&content, ext, notes, ids, meta),
616 ))
617 }
618 };
619 (block, consumed)
620}
621
622fn verbatim(lines: &[&str]) -> String {
624 let mut out = String::new();
625 for line in lines {
626 out.push_str(line);
627 out.push('\n');
628 }
629 out
630}
631
632fn dedent_verbatim(lines: &[&str]) -> String {
635 let indent = lines
636 .iter()
637 .filter(|l| !l.trim().is_empty())
638 .map(|l| l.len() - l.trim_start().len())
639 .min()
640 .unwrap_or(0);
641 let mut out = String::new();
642 for line in lines {
643 let trimmed = line.get(indent..).unwrap_or("");
644 out.push_str(if line.trim().is_empty() {
645 line
646 } else {
647 trimmed
648 });
649 out.push('\n');
650 }
651 out
652}
653
654fn keyword_line(line: &str) -> Option<(String, String)> {
659 let trimmed = line.trim_start();
660 let rest = trimmed.strip_prefix("#+")?;
661 let colon = rest.find(':')?;
662 let key = rest.get(..colon)?;
663 if key.is_empty()
664 || !key
665 .chars()
666 .all(|c| c.is_ascii_alphanumeric() || matches!(c, '_' | '-'))
667 {
668 return None;
669 }
670 if key.eq_ignore_ascii_case("begin_src")
671 || starts_with_ci(key, "begin_")
672 || starts_with_ci(key, "end_")
673 {
674 return None;
675 }
676 let value = rest.get(colon + 1..).unwrap_or("").trim_start().to_owned();
677 Some((key.to_owned(), value))
678}
679
680#[allow(clippy::too_many_arguments)]
681fn handle_keyword(
682 key: &str,
683 value: &str,
684 line: &str,
685 ext: Extensions,
686 notes: &BTreeMap<String, Vec<Block>>,
687 meta: &mut BTreeMap<Text, MetaValue>,
688 pending: &mut Affiliated,
689 out: &mut Vec<Block>,
690) {
691 let upper = key.to_ascii_uppercase();
692 match upper.as_str() {
693 "TITLE" | "SUBTITLE" | "AUTHOR" | "DATE" | "KEYWORDS" | "DESCRIPTION" => {
694 meta.insert(
695 upper.to_ascii_lowercase().into(),
696 MetaValue::MetaInlines(parse_inlines(value, ext, notes)),
697 );
698 }
699 "LANGUAGE" => {
700 meta.insert("lang".into(), MetaValue::MetaString(value.into()));
701 }
702 "CAPTION" => pending.caption = Some(parse_inlines(value, ext, notes)),
703 "NAME" | "LABEL" => pending.name = Some(value.to_owned()),
704 "OPTIONS" | "TODO" | "SEQ_TODO" | "TYP_TODO" | "PRIORITIES" | "TAGS" | "COLUMNS"
705 | "SETUPFILE" | "CONSTANTS" | "MACRO" | "DRAWERS" | "ARCHIVE" | "RESULTS" | "HEADER"
706 | "PLOT" => {}
707 other if other.starts_with("ATTR_") => {}
708 other if other.starts_with("LATEX_HEADER") => {
709 append_header_include(meta, "latex", value);
710 }
711 other if other.starts_with("HTML_HEAD") => {
712 append_header_include(meta, "html", value);
713 }
714 _ => out.push(Block::RawBlock(
715 Format("org".into()),
716 line.trim_end().into(),
717 )),
718 }
719}
720
721fn append_header_include(meta: &mut BTreeMap<Text, MetaValue>, format: &str, value: &str) {
722 let entry =
723 MetaValue::MetaInlines(vec![Inline::RawInline(Format(format.into()), value.into())]);
724 match meta
725 .entry("header-includes".into())
726 .or_insert_with(|| MetaValue::MetaList(Vec::new()))
727 {
728 MetaValue::MetaList(list) => list.push(entry),
729 slot => *slot = MetaValue::MetaList(vec![entry]),
730 }
731}
732
733fn is_horizontal_rule(line: &str) -> bool {
736 let t = line.trim();
737 t.len() >= 5 && t.chars().all(|c| c == '-')
738}
739
740fn is_fixed_width(line: &str) -> bool {
741 let t = line.trim_start();
742 t == ":" || t.starts_with(": ")
743}
744
745fn collect_fixed_width(lines: &[&str], start: usize) -> (String, usize) {
746 let mut text = String::new();
747 let mut i = start;
748 while let Some(&line) = lines.get(i) {
749 if !is_fixed_width(line) {
750 break;
751 }
752 let t = line.trim_start();
753 let content = t
754 .strip_prefix(": ")
755 .or_else(|| t.strip_prefix(':'))
756 .unwrap_or("");
757 text.push_str(content);
758 text.push('\n');
759 i += 1;
760 }
761 (text, i - start)
762}
763
764fn drawer_open(line: &str) -> Option<String> {
766 let t = line.trim();
767 let inner = t.strip_prefix(':')?.strip_suffix(':')?;
768 if inner.is_empty()
769 || inner.contains(':')
770 || inner.eq_ignore_ascii_case("END")
771 || !inner
772 .chars()
773 .all(|c| c.is_ascii_alphanumeric() || matches!(c, '_' | '-' | '@' | '#' | '%'))
774 {
775 return None;
776 }
777 Some(inner.to_owned())
778}
779
780fn collect_drawer<'a>(lines: &[&'a str], start: usize) -> (Vec<&'a str>, usize) {
781 let mut inner = Vec::new();
782 let mut i = start + 1;
783 while let Some(&line) = lines.get(i) {
784 if line.trim().eq_ignore_ascii_case(":END:") {
785 i += 1;
786 break;
787 }
788 inner.push(line);
789 i += 1;
790 }
791 (inner, i - start)
792}
793
794fn is_table_line(line: &str) -> bool {
797 line.trim_start().starts_with('|')
798}
799
800enum TableRow {
802 Separator,
803 Cells(Vec<String>),
804}
805
806fn collect_table(lines: &[&str], start: usize) -> (Vec<TableRow>, usize) {
807 let mut rows = Vec::new();
808 let mut i = start;
809 while let Some(&line) = lines.get(i) {
810 if !is_table_line(line) {
811 break;
812 }
813 rows.push(parse_table_row(line));
814 i += 1;
815 }
816 (rows, i - start)
817}
818
819fn parse_table_row(line: &str) -> TableRow {
820 let t = line.trim();
821 let inner = t.strip_prefix('|').unwrap_or(t);
822 let inner = inner.strip_suffix('|').unwrap_or(inner);
823 if !inner.is_empty()
824 && inner
825 .chars()
826 .all(|c| matches!(c, '-' | '+' | '|' | ' ' | ':'))
827 {
828 return TableRow::Separator;
829 }
830 let cells = inner.split('|').map(|c| c.trim().to_owned()).collect();
831 TableRow::Cells(cells)
832}
833
834fn build_table(
835 rows: &[TableRow],
836 ext: Extensions,
837 notes: &BTreeMap<String, Vec<Block>>,
838 pending: &mut Affiliated,
839) -> Block {
840 let mut head_rows: Vec<Vec<String>> = Vec::new();
841 let mut body_rows: Vec<Vec<String>> = Vec::new();
842 let mut seen_separator = false;
843 let mut header_done = false;
844 for row in rows {
845 match row {
846 TableRow::Separator => {
847 if !body_rows.is_empty() {
848 header_done = true;
849 } else if !head_rows.is_empty() {
850 seen_separator = true;
851 }
852 }
853 TableRow::Cells(cells) => {
854 if seen_separator || header_done {
855 body_rows.push(cells.clone());
856 } else {
857 head_rows.push(cells.clone());
858 }
859 }
860 }
861 }
862 if !seen_separator {
864 body_rows.splice(0..0, head_rows.drain(..));
865 }
866
867 let columns = head_rows
868 .iter()
869 .chain(body_rows.iter())
870 .map(Vec::len)
871 .max()
872 .unwrap_or(0);
873
874 let col_specs = (0..columns)
875 .map(|_| ColSpec {
876 align: Alignment::AlignDefault,
877 width: ColWidth::ColWidthDefault,
878 })
879 .collect();
880
881 let to_rows = |cells: &[Vec<String>]| -> Vec<Row> {
882 cells
883 .iter()
884 .map(|row| Row {
885 attr: Attr::default(),
886 cells: (0..columns)
887 .map(|c| build_cell(row.get(c).map_or("", String::as_str), ext, notes))
888 .collect(),
889 })
890 .collect()
891 };
892
893 let Affiliated { caption, name } = mem::take(pending);
894 let caption = Caption {
895 short: None,
896 long: caption.map(|c| vec![Block::Plain(c)]).unwrap_or_default(),
897 };
898
899 let table = Table {
900 attr: Attr {
901 id: name.unwrap_or_default().into(),
902 ..Attr::default()
903 },
904 caption,
905 col_specs,
906 head: TableHead {
907 attr: Attr::default(),
908 rows: to_rows(&head_rows),
909 },
910 bodies: vec![TableBody {
911 attr: Attr::default(),
912 row_head_columns: 0,
913 head: Vec::new(),
914 body: to_rows(&body_rows),
915 }],
916 foot: TableFoot::default(),
917 };
918 Block::Table(Box::new(table))
919}
920
921fn build_cell(text: &str, ext: Extensions, notes: &BTreeMap<String, Vec<Block>>) -> Cell {
922 let content = if text.is_empty() {
923 Vec::new()
924 } else {
925 vec![Block::Plain(parse_inlines(text, ext, notes))]
926 };
927 Cell {
928 attr: Attr::default(),
929 align: Alignment::AlignDefault,
930 row_span: 1,
931 col_span: 1,
932 content,
933 }
934}
935
936#[derive(Clone, Copy, PartialEq)]
940enum Marker {
941 Bullet,
942 Ordered(ListNumberStyle, ListNumberDelim),
943}
944
945struct MarkerInfo {
948 indent: usize,
949 content_col: usize,
950 kind: Marker,
951}
952
953fn list_marker(line: &str) -> Option<MarkerInfo> {
954 let indent = line.len() - line.trim_start().len();
955 let rest = line.get(indent..)?;
956 let bytes = rest.as_bytes();
957 if let Some(&c) = bytes.first()
959 && (matches!(c, b'-' | b'+') || (c == b'*' && indent > 0))
960 && (bytes.get(1) == Some(&b' ') || bytes.len() == 1)
961 {
962 return Some(MarkerInfo {
963 indent,
964 content_col: indent + 2,
965 kind: Marker::Bullet,
966 });
967 }
968 let mut j = 0;
970 while bytes.get(j).is_some_and(u8::is_ascii_digit) {
971 j += 1;
972 }
973 let style = if j > 0 {
974 ListNumberStyle::Decimal
975 } else if let Some(&letter) = bytes
976 .first()
977 .filter(|c| c.is_ascii_alphabetic())
978 .filter(|_| bytes.get(1).is_some_and(|&c| c == b'.' || c == b')'))
979 {
980 j = 1;
981 if letter.is_ascii_uppercase() {
982 ListNumberStyle::UpperAlpha
983 } else {
984 ListNumberStyle::LowerAlpha
985 }
986 } else {
987 return None;
988 };
989 let delim = match bytes.get(j) {
990 Some(b'.') => ListNumberDelim::Period,
991 Some(b')') => ListNumberDelim::OneParen,
992 _ => return None,
993 };
994 if bytes.get(j + 1) == Some(&b' ') || bytes.len() == j + 1 {
995 Some(MarkerInfo {
996 indent,
997 content_col: indent + j + 2,
998 kind: Marker::Ordered(style, delim),
999 })
1000 } else {
1001 None
1002 }
1003}
1004
1005fn parse_list(
1006 lines: &[&str],
1007 start: usize,
1008 ext: Extensions,
1009 notes: &BTreeMap<String, Vec<Block>>,
1010 ids: &mut IdRegistry,
1011 meta: &mut BTreeMap<Text, MetaValue>,
1012) -> (Option<Block>, usize) {
1013 let Some(first) = list_marker(lines.get(start).copied().unwrap_or("")) else {
1014 return (None, 1);
1015 };
1016 let base_indent = first.indent;
1017 let first_kind = first.kind;
1018
1019 let mut items: Vec<Vec<&str>> = Vec::new();
1020 let mut loose = false;
1021 let mut i = start;
1022 let mut pending_blank = false;
1023
1024 while let Some(&line) = lines.get(i) {
1025 if line.trim().is_empty() {
1026 pending_blank = true;
1027 i += 1;
1028 continue;
1029 }
1030 if let Some(marker) = list_marker(line)
1031 && marker.indent == base_indent
1032 && same_series(first_kind, marker.kind)
1033 {
1034 if pending_blank && !items.is_empty() {
1035 loose = true;
1036 }
1037 pending_blank = false;
1038 let content_col = marker.content_col;
1039 let mut item_lines = vec![line.get(content_col..).unwrap_or("")];
1040 i += 1;
1041 while let Some(&next) = lines.get(i) {
1043 if next.trim().is_empty() {
1044 pending_blank = true;
1045 item_lines.push("");
1046 i += 1;
1047 continue;
1048 }
1049 let next_indent = next.len() - next.trim_start().len();
1050 let is_sibling = list_marker(next).is_some_and(|m| m.indent == base_indent);
1051 if next_indent > base_indent && !is_sibling {
1052 if pending_blank {
1053 loose = true;
1054 }
1055 pending_blank = false;
1056 item_lines.push(dedent_line(next, content_col));
1057 i += 1;
1058 } else {
1059 break;
1060 }
1061 }
1062 while item_lines.last() == Some(&"") {
1064 item_lines.pop();
1065 }
1066 items.push(item_lines);
1067 continue;
1068 }
1069 break;
1070 }
1071
1072 if items.is_empty() {
1073 return (None, 1);
1074 }
1075
1076 if let Some(defs) = try_definition_list(&items, ext, notes, ids, meta, loose) {
1078 return (Some(defs), i - start);
1079 }
1080
1081 let item_blocks: Vec<Vec<Block>> = items
1082 .iter()
1083 .map(|item| {
1084 let blocks = parse_list_item(item, ext, notes, ids, meta);
1085 if loose { blocks } else { tighten(blocks) }
1086 })
1087 .collect();
1088
1089 let block = match first_kind {
1090 Marker::Bullet => Block::BulletList(item_blocks),
1091 Marker::Ordered(style, delim) => {
1092 let (style, delim) = if ext.contains(Extension::FancyLists) {
1093 (style, delim)
1094 } else {
1095 (ListNumberStyle::DefaultStyle, ListNumberDelim::DefaultDelim)
1096 };
1097 Block::OrderedList(
1098 ListAttributes {
1099 start: 1,
1100 style,
1101 delim,
1102 },
1103 item_blocks,
1104 )
1105 }
1106 };
1107 (Some(block), i - start)
1108}
1109
1110fn same_series(a: Marker, b: Marker) -> bool {
1112 matches!(
1113 (a, b),
1114 (Marker::Bullet, Marker::Bullet) | (Marker::Ordered(..), Marker::Ordered(..))
1115 )
1116}
1117
1118fn parse_list_item(
1119 item: &[&str],
1120 ext: Extensions,
1121 notes: &BTreeMap<String, Vec<Block>>,
1122 ids: &mut IdRegistry,
1123 meta: &mut BTreeMap<Text, MetaValue>,
1124) -> Vec<Block> {
1125 let mut lines = item.to_vec();
1126 let mut checkbox = None;
1127 if ext.contains(Extension::TaskLists)
1128 && let Some(first) = lines.first_mut()
1129 && let Some((glyph, rest)) = strip_checkbox(first)
1130 {
1131 checkbox = Some(glyph);
1132 *first = rest;
1133 }
1134 let mut blocks = parse_blocks(&lines, ext, notes, ids, meta);
1135 if let Some(glyph) = checkbox {
1136 prepend_checkbox(&mut blocks, glyph);
1137 }
1138 blocks
1139}
1140
1141fn strip_checkbox(line: &str) -> Option<(&'static str, &str)> {
1144 for (token, glyph) in [
1145 ("[ ]", "\u{2610}"),
1146 ("[-]", "\u{2610}"),
1147 ("[X]", "\u{2612}"),
1148 ] {
1149 if let Some(rest) = line.strip_prefix(token) {
1150 if rest.is_empty() {
1151 return Some((glyph, rest));
1152 }
1153 if let Some(after) = rest.strip_prefix(' ') {
1154 return Some((glyph, after));
1155 }
1156 }
1157 }
1158 None
1159}
1160
1161fn prepend_checkbox(blocks: &mut Vec<Block>, glyph: &str) {
1164 match blocks.first_mut() {
1165 Some(Block::Plain(inlines) | Block::Para(inlines)) => {
1166 inlines.splice(0..0, [Inline::Str(glyph.into()), Inline::Space]);
1167 }
1168 _ => blocks.insert(0, Block::Plain(vec![Inline::Str(glyph.into())])),
1169 }
1170}
1171
1172fn tighten(blocks: Vec<Block>) -> Vec<Block> {
1174 blocks
1175 .into_iter()
1176 .map(|b| match b {
1177 Block::Para(inlines) => Block::Plain(inlines),
1178 other => other,
1179 })
1180 .collect()
1181}
1182
1183fn try_definition_list(
1184 items: &[Vec<&str>],
1185 ext: Extensions,
1186 notes: &BTreeMap<String, Vec<Block>>,
1187 ids: &mut IdRegistry,
1188 meta: &mut BTreeMap<Text, MetaValue>,
1189 loose: bool,
1190) -> Option<Block> {
1191 let first = items.first()?;
1192 split_definition(first.first().copied().unwrap_or(""))?;
1193 let mut entries = Vec::new();
1194 for item in items {
1195 let head = item.first().copied().unwrap_or("");
1196 let (term_text, def_first) = match split_definition(head) {
1197 Some(pair) => pair,
1198 None => (head, ""),
1199 };
1200 let term = parse_inlines(term_text.trim(), ext, notes);
1201 let mut def_lines = vec![def_first];
1202 def_lines.extend(item.get(1..).unwrap_or(&[]).iter().copied());
1203 let blocks = parse_blocks(&def_lines, ext, notes, ids, meta);
1204 let blocks = if loose { blocks } else { tighten(blocks) };
1205 entries.push((term, vec![blocks]));
1206 }
1207 Some(Block::DefinitionList(entries))
1208}
1209
1210fn split_definition(line: &str) -> Option<(&str, &str)> {
1213 let idx = line.find(" :: ")?;
1214 Some((line.get(..idx)?, line.get(idx + 4..)?))
1215}
1216
1217fn dedent_line(line: &str, col: usize) -> &str {
1219 let indent = line.len() - line.trim_start().len();
1220 let drop = indent.min(col);
1221 line.get(drop..).unwrap_or("")
1222}
1223
1224fn parse_inlines(text: &str, ext: Extensions, notes: &BTreeMap<String, Vec<Block>>) -> Vec<Inline> {
1227 let chars: Vec<char> = text.chars().collect();
1228 let mut scanner = Inlines {
1229 chars: &chars,
1230 ext,
1231 notes,
1232 out: Vec::new(),
1233 word: String::new(),
1234 };
1235 scanner.run();
1236 scanner.finish()
1237}
1238
1239struct Inlines<'a> {
1240 chars: &'a [char],
1241 ext: Extensions,
1242 notes: &'a BTreeMap<String, Vec<Block>>,
1243 out: Vec<Inline>,
1244 word: String,
1245}
1246
1247impl Inlines<'_> {
1248 fn finish(mut self) -> Vec<Inline> {
1249 self.flush();
1250 self.out
1251 }
1252
1253 fn flush(&mut self) {
1254 if !self.word.is_empty() {
1255 self.out.push(Inline::Str(mem::take(&mut self.word).into()));
1256 }
1257 }
1258
1259 fn push_inline(&mut self, inline: Inline) {
1260 self.flush();
1261 self.out.push(inline);
1262 }
1263
1264 fn at(&self, i: usize) -> Option<char> {
1265 self.chars.get(i).copied()
1266 }
1267
1268 #[allow(clippy::too_many_lines)]
1269 fn run(&mut self) {
1270 let mut i = 0;
1271 while let Some(c) = self.at(i) {
1272 let prev = if i == 0 { None } else { self.at(i - 1) };
1273
1274 if is_url_boundary(prev)
1276 && let Some((url, end)) = self.scan_bare_url(i)
1277 {
1278 self.push_inline(link(&url, vec![Inline::Str(url.clone().into())]));
1279 i = end;
1280 continue;
1281 }
1282
1283 match c {
1284 ' ' | '\t' => {
1285 self.flush();
1286 while matches!(self.at(i), Some(' ' | '\t')) {
1287 i += 1;
1288 }
1289 self.out.push(Inline::Space);
1290 }
1291 '\n' => {
1292 self.flush();
1293 self.out.push(Inline::SoftBreak);
1294 i += 1;
1295 }
1296 '\\' => i = self.scan_backslash(i),
1297 '*' | '/' | '+' => {
1298 if let Some(end) = self.scan_emphasis(i, c, prev) {
1299 let inner = self.chars.get(i + 1..end).unwrap_or(&[]);
1300 let content = parse_inlines(&collect_str(inner), self.ext, self.notes);
1301 self.push_inline(wrap_markup(c, content));
1302 i = end + 1;
1303 } else {
1304 self.word.push(c);
1305 i += 1;
1306 }
1307 }
1308 '_' => {
1309 if let Some(end) = self.scan_emphasis(i, '_', prev) {
1310 let inner = self.chars.get(i + 1..end).unwrap_or(&[]);
1311 let content = parse_inlines(&collect_str(inner), self.ext, self.notes);
1312 self.push_inline(Inline::Underline(content));
1313 i = end + 1;
1314 } else if let Some((inline, end)) = self.scan_subsup(i, prev, false) {
1315 self.push_inline(inline);
1316 i = end;
1317 } else {
1318 self.word.push('_');
1319 i += 1;
1320 }
1321 }
1322 '^' => {
1323 if let Some((inline, end)) = self.scan_subsup(i, prev, true) {
1324 self.push_inline(inline);
1325 i = end;
1326 } else {
1327 self.word.push('^');
1328 i += 1;
1329 }
1330 }
1331 '=' | '~' => {
1332 if let Some(end) = self.scan_emphasis(i, c, prev) {
1335 let inner = self.chars.get(i + 1..end).unwrap_or(&[]);
1336 self.push_inline(verbatim_code(c, inner));
1337 i = end + 1;
1338 } else {
1339 self.word.push(c);
1340 i += 1;
1341 }
1342 }
1343 '[' => {
1344 if let Some((inline, end)) = self.scan_bracket(i) {
1345 self.push_inline(inline);
1346 i = end;
1347 } else {
1348 self.word.push('[');
1349 i += 1;
1350 }
1351 }
1352 '<' => {
1353 if let Some((inline, end)) = self.scan_angle(i) {
1354 self.push_inline(inline);
1355 i = end;
1356 } else {
1357 self.word.push('<');
1358 i += 1;
1359 }
1360 }
1361 '$' => {
1362 if let Some((inline, end)) = self.scan_math_dollar(i, prev) {
1363 self.push_inline(inline);
1364 i = end;
1365 } else {
1366 self.word.push('$');
1367 i += 1;
1368 }
1369 }
1370 '@' => {
1371 if let Some((inline, end)) = self.scan_export(i) {
1372 self.push_inline(inline);
1373 i = end;
1374 } else {
1375 self.word.push('@');
1376 i += 1;
1377 }
1378 }
1379 '-' | '.' => {
1380 if let Some((text, end)) = self.scan_special_string(i) {
1382 self.word.push_str(text);
1383 i = end;
1384 } else {
1385 self.word.push(c);
1386 i += 1;
1387 }
1388 }
1389 '\'' if self.ext.contains(Extension::Smart)
1390 && prev.is_some_and(char::is_alphanumeric) =>
1391 {
1392 self.word.push('\u{2019}');
1394 i += 1;
1395 }
1396 '"' | '\'' if self.ext.contains(Extension::Smart) => {
1397 let (inline, end) = self.scan_quote(i, c);
1398 if let Some(q) = inline {
1399 self.push_inline(q);
1400 i = end;
1401 } else {
1402 self.word.push(c);
1403 i += 1;
1404 }
1405 }
1406 _ => {
1407 self.word.push(c);
1408 i += 1;
1409 }
1410 }
1411 }
1412 }
1413
1414 fn scan_emphasis(&self, i: usize, marker: char, prev: Option<char>) -> Option<usize> {
1419 if !pre_ok(prev) {
1420 return None;
1421 }
1422 let first = self.at(i + 1)?;
1423 if first.is_whitespace() {
1424 return None;
1425 }
1426 let mut newlines = 0;
1427 let mut j = i + 1;
1428 while let Some(c) = self.at(j) {
1429 if c == '\n' {
1430 newlines += 1;
1431 if newlines > 1 {
1432 return None;
1433 }
1434 }
1435 if c == marker
1436 && j > i + 1
1437 && !self.at(j - 1).is_some_and(char::is_whitespace)
1438 && post_ok(self.at(j + 1))
1439 {
1440 return Some(j);
1441 }
1442 j += 1;
1443 }
1444 None
1445 }
1446
1447 fn scan_subsup(&self, i: usize, prev: Option<char>, sup: bool) -> Option<(Inline, usize)> {
1452 if prev.is_none_or(|c| c.is_whitespace() || c == '_') {
1455 return None;
1456 }
1457 let content;
1458 let end;
1459 if self.at(i + 1) == Some('{') {
1460 let close = self.match_brace(i + 1)?;
1461 let inner = self.chars.get(i + 2..close).unwrap_or(&[]);
1462 content = parse_inlines(&collect_str(inner), self.ext, self.notes);
1463 end = close + 1;
1464 } else {
1465 let (text, stop) = self.scan_bare_script(i + 1)?;
1466 content = vec![Inline::Str(text.into())];
1467 end = stop;
1468 }
1469 let inline = if sup {
1470 Inline::Superscript(content)
1471 } else {
1472 Inline::Subscript(content)
1473 };
1474 Some((inline, end))
1475 }
1476
1477 fn scan_bare_script(&self, start: usize) -> Option<(String, usize)> {
1480 let mut j = start;
1481 if matches!(self.at(j), Some('-' | '+')) {
1482 j += 1;
1483 }
1484 let body_start = j;
1485 while matches!(self.at(j), Some(c) if c.is_alphanumeric() || matches!(c, '.' | ',' | '\\'))
1486 {
1487 j += 1;
1488 }
1489 let mut last = j;
1491 while last > body_start && !self.at(last - 1).is_some_and(char::is_alphanumeric) {
1492 last -= 1;
1493 }
1494 if last <= body_start {
1495 return None;
1496 }
1497 let text: String = self.chars.get(start..last).unwrap_or(&[]).iter().collect();
1498 Some((text, last))
1499 }
1500
1501 fn match_brace(&self, open: usize) -> Option<usize> {
1502 let mut depth = 0usize;
1503 let mut j = open;
1504 while let Some(c) = self.at(j) {
1505 match c {
1506 '{' => depth += 1,
1507 '}' => {
1508 depth -= 1;
1509 if depth == 0 {
1510 return Some(j);
1511 }
1512 }
1513 '\n' => return None,
1514 _ => {}
1515 }
1516 j += 1;
1517 }
1518 None
1519 }
1520
1521 fn scan_backslash(&mut self, i: usize) -> usize {
1524 match self.at(i + 1) {
1525 Some('\\') => {
1526 self.push_inline(Inline::LineBreak);
1528 let mut j = i + 2;
1529 while matches!(self.at(j), Some(' ' | '\t')) {
1530 j += 1;
1531 }
1532 if self.at(j) == Some('\n') {
1533 j += 1;
1534 }
1535 j
1536 }
1537 Some('(') => self.scan_tex_math(i + 2, "\\)", MathType::InlineMath, i),
1538 Some('[') => self.scan_tex_math(i + 2, "\\]", MathType::DisplayMath, i),
1539 Some(c) if c.is_ascii_alphabetic() => self.scan_entity(i),
1540 _ => {
1541 self.word.push('\\');
1542 i + 1
1543 }
1544 }
1545 }
1546
1547 fn scan_tex_math(
1548 &mut self,
1549 start: usize,
1550 close: &str,
1551 kind: MathType,
1552 fallback: usize,
1553 ) -> usize {
1554 let closing: Vec<char> = close.chars().collect();
1555 let mut j = start;
1556 while j < self.chars.len() {
1557 if self.matches_at(j, &closing) {
1558 let inner: String = self.chars.get(start..j).unwrap_or(&[]).iter().collect();
1559 self.push_inline(Inline::Math(kind, inner.into()));
1560 return j + closing.len();
1561 }
1562 j += 1;
1563 }
1564 self.word.push('\\');
1566 fallback + 1
1567 }
1568
1569 fn scan_entity(&mut self, i: usize) -> usize {
1570 let mut j = i + 1;
1571 while matches!(self.at(j), Some(c) if c.is_ascii_alphabetic()) {
1572 j += 1;
1573 }
1574 let name: String = self.chars.get(i + 1..j).unwrap_or(&[]).iter().collect();
1575 let mut end = j;
1577 if self.at(j) == Some('{') && self.at(j + 1) == Some('}') {
1578 end = j + 2;
1579 }
1580 if let Some(replacement) = entity(&name) {
1581 self.word.push_str(replacement);
1582 } else {
1583 self.push_inline(Inline::RawInline(
1584 Format("latex".into()),
1585 format!("\\{name}").into(),
1586 ));
1587 }
1588 end
1589 }
1590
1591 fn scan_bracket(&self, i: usize) -> Option<(Inline, usize)> {
1594 if self.at(i + 1) == Some('[') {
1595 return self.scan_link(i);
1596 }
1597 if self.matches_at(i + 1, &['f', 'n', ':']) {
1598 return self.scan_footnote(i);
1599 }
1600 if self.ext.contains(Extension::Citations)
1601 && (self.matches_at(i + 1, &['c', 'i', 't', 'e', ':'])
1602 || self.matches_at(i + 1, &['c', 'i', 't', 'e', '/']))
1603 {
1604 return self.scan_citation(i);
1605 }
1606 None
1607 }
1608
1609 fn scan_link(&self, i: usize) -> Option<(Inline, usize)> {
1610 let inner_start = i + 2;
1612 let close = self.find_double_close(inner_start)?;
1613 let inner: String = self
1614 .chars
1615 .get(inner_start..close)
1616 .unwrap_or(&[])
1617 .iter()
1618 .collect();
1619 let (target_raw, desc_raw) = match inner.find("][") {
1620 Some(idx) => (
1621 inner.get(..idx).unwrap_or(""),
1622 Some(inner.get(idx + 2..).unwrap_or("")),
1623 ),
1624 None => (inner.as_str(), None),
1625 };
1626 let target = process_target(target_raw);
1627 let end = close + 2;
1628 match desc_raw {
1629 Some(desc) => Some((
1630 link(&target, parse_inlines(desc, self.ext, self.notes)),
1631 end,
1632 )),
1633 None => {
1634 if is_image_target(&target) {
1635 Some((image(&target, Vec::new()), end))
1636 } else {
1637 Some((link(&target, vec![Inline::Str(target_raw.into())]), end))
1638 }
1639 }
1640 }
1641 }
1642
1643 fn find_double_close(&self, from: usize) -> Option<usize> {
1645 let mut j = from;
1646 while j + 1 < self.chars.len() {
1647 if self.at(j) == Some(']') && self.at(j + 1) == Some(']') {
1648 return Some(j);
1649 }
1650 j += 1;
1651 }
1652 None
1653 }
1654
1655 fn scan_footnote(&self, i: usize) -> Option<(Inline, usize)> {
1656 let close = self.match_bracket(i)?;
1658 let inner: String = self.chars.get(i + 1..close).unwrap_or(&[]).iter().collect();
1659 let body = inner.strip_prefix("fn:")?;
1660 let end = close + 1;
1661 if let Some((label, text)) = body.split_once(':') {
1662 let note = vec![Block::Para(parse_inlines(
1664 text.trim(),
1665 self.ext,
1666 self.notes,
1667 ))];
1668 let _ = label;
1669 return Some((Inline::Note(note), end));
1670 }
1671 let blocks = self.notes.get(body).cloned().unwrap_or_default();
1673 Some((Inline::Note(blocks), end))
1674 }
1675
1676 fn scan_citation(&self, i: usize) -> Option<(Inline, usize)> {
1677 let close = self.match_bracket(i)?;
1678 let inner: String = self.chars.get(i + 1..close).unwrap_or(&[]).iter().collect();
1679 let raw: String = self.chars.get(i..close + 1).unwrap_or(&[]).iter().collect();
1680 let rest = inner.strip_prefix("cite")?;
1681 let (style, payload) = match rest.strip_prefix('/') {
1682 Some(after) => {
1683 let (sty, pay) = after.split_once(':')?;
1684 (Some(sty), pay)
1685 }
1686 None => (None, rest.strip_prefix(':')?),
1687 };
1688 let citations = parse_citation_items(payload, style, self.ext, self.notes)?;
1689 Some((Inline::Cite(citations, plain_words(&raw)), close + 1))
1690 }
1691
1692 fn match_bracket(&self, open: usize) -> Option<usize> {
1693 let mut depth = 0usize;
1694 let mut j = open;
1695 while let Some(c) = self.at(j) {
1696 match c {
1697 '[' => depth += 1,
1698 ']' => {
1699 depth -= 1;
1700 if depth == 0 {
1701 return Some(j);
1702 }
1703 }
1704 _ => {}
1705 }
1706 j += 1;
1707 }
1708 None
1709 }
1710
1711 fn scan_angle(&self, i: usize) -> Option<(Inline, usize)> {
1714 if self.at(i + 1) == Some('<') {
1715 let name_start = i + 2;
1717 let mut j = name_start;
1718 while matches!(self.at(j), Some(c) if c != '<' && c != '>' && c != '\n') {
1719 j += 1;
1720 }
1721 if j > name_start && self.at(j) == Some('>') && self.at(j + 1) == Some('>') {
1722 let name: String = self
1723 .chars
1724 .get(name_start..j)
1725 .unwrap_or(&[])
1726 .iter()
1727 .collect();
1728 let attr = Attr {
1729 id: name.into(),
1730 ..Attr::default()
1731 };
1732 let mut end = j + 2;
1734 while matches!(self.at(end), Some(' ' | '\t')) {
1735 end += 1;
1736 }
1737 return Some((Inline::Span(Box::new(attr), Vec::new()), end));
1738 }
1739 return None;
1740 }
1741 let mut j = i + 1;
1743 while matches!(self.at(j), Some(c) if c != '>' && c != '\n') {
1744 j += 1;
1745 }
1746 if self.at(j) != Some('>') {
1747 return None;
1748 }
1749 let content: String = self.chars.get(i + 1..j).unwrap_or(&[]).iter().collect();
1750 if is_uri(&content) {
1751 return Some((
1752 link(&content, vec![Inline::Str(content.clone().into())]),
1753 j + 1,
1754 ));
1755 }
1756 None
1757 }
1758
1759 fn scan_math_dollar(&self, i: usize, prev: Option<char>) -> Option<(Inline, usize)> {
1762 if self.at(i + 1) == Some('$') {
1763 let start = i + 2;
1765 let mut j = start;
1766 while j + 1 < self.chars.len() {
1767 if self.at(j) == Some('$') && self.at(j + 1) == Some('$') {
1768 let inner: String = self.chars.get(start..j).unwrap_or(&[]).iter().collect();
1769 return Some((Inline::Math(MathType::DisplayMath, inner.into()), j + 2));
1770 }
1771 j += 1;
1772 }
1773 return None;
1774 }
1775 if prev.is_some_and(|c| c.is_alphanumeric() || c == '$') {
1777 return None;
1778 }
1779 let first = self.at(i + 1)?;
1780 if first.is_whitespace() || first == '$' {
1781 return None;
1782 }
1783 let mut j = i + 1;
1784 while let Some(c) = self.at(j) {
1785 if c == '\n' {
1786 return None;
1787 }
1788 if c == '$'
1789 && !self.at(j - 1).is_some_and(char::is_whitespace)
1790 && !self.at(j + 1).is_some_and(char::is_alphanumeric)
1791 {
1792 let inner: String = self.chars.get(i + 1..j).unwrap_or(&[]).iter().collect();
1793 return Some((Inline::Math(MathType::InlineMath, inner.into()), j + 1));
1794 }
1795 j += 1;
1796 }
1797 None
1798 }
1799
1800 fn scan_export(&self, i: usize) -> Option<(Inline, usize)> {
1803 if self.at(i + 1) != Some('@') {
1805 return None;
1806 }
1807 let fmt_start = i + 2;
1808 let mut j = fmt_start;
1809 while matches!(self.at(j), Some(c) if c.is_ascii_alphanumeric() || c == '-') {
1810 j += 1;
1811 }
1812 if self.at(j) != Some(':') || j == fmt_start {
1813 return None;
1814 }
1815 let fmt: String = self.chars.get(fmt_start..j).unwrap_or(&[]).iter().collect();
1816 let content_start = j + 1;
1817 let mut k = content_start;
1818 while k + 1 < self.chars.len() {
1819 if self.at(k) == Some('@') && self.at(k + 1) == Some('@') {
1820 let content: String = self
1821 .chars
1822 .get(content_start..k)
1823 .unwrap_or(&[])
1824 .iter()
1825 .collect();
1826 return Some((Inline::RawInline(Format(fmt.into()), content.into()), k + 2));
1827 }
1828 k += 1;
1829 }
1830 None
1831 }
1832
1833 fn scan_quote(&self, i: usize, quote: char) -> (Option<Inline>, usize) {
1836 let (kind, close) = if quote == '"' {
1837 (QuoteType::DoubleQuote, '"')
1838 } else {
1839 (QuoteType::SingleQuote, '\'')
1840 };
1841 if !matches!(self.at(i + 1), Some(c) if !c.is_whitespace()) {
1843 return (None, i + 1);
1844 }
1845 let mut j = i + 1;
1846 while let Some(c) = self.at(j) {
1847 if c == close
1848 && !self.at(j - 1).is_some_and(char::is_whitespace)
1849 && post_ok(self.at(j + 1))
1850 {
1851 let inner = self.chars.get(i + 1..j).unwrap_or(&[]);
1852 let content = parse_inlines(&collect_str(inner), self.ext, self.notes);
1853 return (Some(Inline::Quoted(kind, content)), j + 1);
1854 }
1855 if c == '\n' {
1856 break;
1857 }
1858 j += 1;
1859 }
1860 (None, i + 1)
1861 }
1862
1863 fn scan_special_string(&self, i: usize) -> Option<(&'static str, usize)> {
1867 if self.at(i) == Some('.') {
1868 if self.at(i + 1) == Some('.') && self.at(i + 2) == Some('.') {
1869 return Some(("\u{2026}", i + 3));
1870 }
1871 return None;
1872 }
1873 if self.at(i + 1) == Some('-') {
1875 if self.at(i + 2) == Some('-') {
1876 return Some(("\u{2014}", i + 3));
1877 }
1878 return Some(("\u{2013}", i + 2));
1879 }
1880 None
1881 }
1882
1883 fn scan_bare_url(&self, i: usize) -> Option<(String, usize)> {
1886 const SCHEMES: [&str; 3] = ["https://", "http://", "ftp://"];
1887 let scheme = SCHEMES.iter().find(|s| self.matches_str(i, s)).copied()?;
1888 let mut j = i + scheme.chars().count();
1889 while matches!(self.at(j), Some(c) if !c.is_whitespace() && !matches!(c, '<' | '>' | '(' | ')' | '[' | ']'))
1890 {
1891 j += 1;
1892 }
1893 while j > i + scheme.chars().count()
1895 && self
1896 .at(j - 1)
1897 .is_some_and(|c| matches!(c, '.' | ',' | ';' | ':' | '!' | '?' | '\'' | '"'))
1898 {
1899 j -= 1;
1900 }
1901 let url: String = self.chars.get(i..j).unwrap_or(&[]).iter().collect();
1902 Some((url, j))
1903 }
1904
1905 fn matches_at(&self, i: usize, pat: &[char]) -> bool {
1908 pat.iter()
1909 .enumerate()
1910 .all(|(k, &c)| self.at(i + k) == Some(c))
1911 }
1912
1913 fn matches_str(&self, i: usize, pat: &str) -> bool {
1914 pat.chars()
1915 .enumerate()
1916 .all(|(k, c)| self.at(i + k) == Some(c))
1917 }
1918}
1919
1920fn collect_str(chars: &[char]) -> String {
1923 chars.iter().collect()
1924}
1925
1926fn plain_words(text: &str) -> Vec<Inline> {
1929 let mut out = Vec::new();
1930 for word in text.split_whitespace() {
1931 if !out.is_empty() {
1932 out.push(Inline::Space);
1933 }
1934 out.push(Inline::Str(word.into()));
1935 }
1936 out
1937}
1938
1939fn wrap_markup(marker: char, content: Vec<Inline>) -> Inline {
1940 match marker {
1941 '*' => Inline::Strong(content),
1942 '+' => Inline::Strikeout(content),
1943 _ => Inline::Emph(content),
1945 }
1946}
1947
1948fn verbatim_code(marker: char, inner: &[char]) -> Inline {
1949 let text: String = inner
1951 .iter()
1952 .map(|&c| if c == '\n' { ' ' } else { c })
1953 .collect();
1954 let attr = if marker == '=' {
1955 Attr {
1956 classes: vec!["verbatim".into()],
1957 ..Attr::default()
1958 }
1959 } else {
1960 Attr::default()
1961 };
1962 Inline::Code(Box::new(attr), text.into())
1963}
1964
1965fn link(target: &str, desc: Vec<Inline>) -> Inline {
1966 Inline::Link(
1967 Box::default(),
1968 desc,
1969 Box::new(carta_ast::Target {
1970 url: target.into(),
1971 title: carta_ast::Text::default(),
1972 }),
1973 )
1974}
1975
1976fn image(target: &str, alt: Vec<Inline>) -> Inline {
1977 Inline::Image(
1978 Box::default(),
1979 alt,
1980 Box::new(carta_ast::Target {
1981 url: target.into(),
1982 title: carta_ast::Text::default(),
1983 }),
1984 )
1985}
1986
1987fn process_target(raw: &str) -> String {
1989 if let Some(rest) = raw.strip_prefix("file:") {
1990 return rest.to_owned();
1991 }
1992 raw.to_owned()
1993}
1994
1995fn is_image_target(target: &str) -> bool {
1996 const EXTS: [&str; 8] = [
1997 ".png", ".jpg", ".jpeg", ".gif", ".svg", ".webp", ".bmp", ".tiff",
1998 ];
1999 let lower = target.to_ascii_lowercase();
2000 EXTS.iter().any(|e| lower.ends_with(e))
2001}
2002
2003fn is_uri(s: &str) -> bool {
2005 if s.chars().any(char::is_whitespace) {
2006 return false;
2007 }
2008 if s.contains("://") {
2009 return true;
2010 }
2011 match s.split_once(':') {
2012 Some((scheme, rest)) => {
2013 !scheme.is_empty()
2014 && !rest.is_empty()
2015 && scheme
2016 .chars()
2017 .all(|c| c.is_ascii_alphanumeric() || matches!(c, '+' | '.' | '-'))
2018 }
2019 None => false,
2020 }
2021}
2022
2023fn is_url_boundary(prev: Option<char>) -> bool {
2024 match prev {
2025 None => true,
2026 Some(c) => !c.is_alphanumeric(),
2027 }
2028}
2029
2030fn pre_ok(prev: Option<char>) -> bool {
2031 match prev {
2032 None => true,
2033 Some(c) => c.is_whitespace() || matches!(c, '-' | '(' | '{' | '\'' | '"'),
2034 }
2035}
2036
2037fn post_ok(next: Option<char>) -> bool {
2038 match next {
2039 None => true,
2040 Some(c) => {
2041 c.is_whitespace()
2042 || matches!(
2043 c,
2044 '-' | '.' | ',' | ':' | '!' | '?' | ';' | '"' | '\'' | ')' | '}' | '['
2045 )
2046 }
2047 }
2048}
2049
2050fn parse_citation_items(
2053 payload: &str,
2054 style: Option<&str>,
2055 ext: Extensions,
2056 notes: &BTreeMap<String, Vec<Block>>,
2057) -> Option<Vec<carta_ast::Citation>> {
2058 let mode = citation_mode(style);
2059 let chunks: Vec<&str> = payload.split(';').collect();
2060
2061 let mut prefix_carry: Option<&str> = None;
2062 let mut items: Vec<(String, Vec<Inline>, Vec<Inline>)> = Vec::new();
2063 let mut trailing_suffix: Option<&str> = None;
2064
2065 for chunk in chunks {
2066 match chunk.find('@') {
2067 Some(at) => {
2068 let prefix = chunk.get(..at).unwrap_or("");
2069 let after = chunk.get(at + 1..).unwrap_or("");
2070 let key_end = after
2071 .find(|c: char| !is_citation_key_char(c))
2072 .unwrap_or(after.len());
2073 let key = after.get(..key_end).unwrap_or("").to_owned();
2074 let suffix = after.get(key_end..).unwrap_or("");
2075 let mut prefix_text = prefix.to_owned();
2076 if let Some(carry) = prefix_carry.take() {
2077 prefix_text = format!("{carry};{prefix}");
2078 }
2079 items.push((
2080 key,
2081 parse_inlines(prefix_text.trim(), ext, notes),
2082 parse_inlines(suffix.trim_end(), ext, notes),
2083 ));
2084 }
2085 None => {
2086 if items.is_empty() {
2087 prefix_carry = Some(chunk);
2088 } else {
2089 trailing_suffix = Some(chunk);
2090 }
2091 }
2092 }
2093 }
2094
2095 if items.is_empty() {
2096 return None;
2097 }
2098
2099 if let (Some(suffix), Some(last)) = (trailing_suffix, items.last_mut()) {
2100 let mut combined = last.2.clone();
2101 if !combined.is_empty() {
2102 combined.push(Inline::Str(";".into()));
2103 }
2104 combined.extend(parse_inlines(suffix.trim(), ext, notes));
2105 last.2 = combined;
2106 }
2107
2108 let citations = items
2109 .into_iter()
2110 .enumerate()
2111 .map(|(idx, (id, prefix, suffix))| carta_ast::Citation {
2112 id: id.into(),
2113 prefix,
2114 suffix,
2115 mode: if idx == 0 {
2116 mode.clone()
2117 } else {
2118 carta_ast::CitationMode::NormalCitation
2119 },
2120 note_num: 0,
2121 hash: 0,
2122 })
2123 .collect();
2124 Some(citations)
2125}
2126
2127fn citation_mode(style: Option<&str>) -> carta_ast::CitationMode {
2128 match style {
2129 Some("t" | "text" | "author") => carta_ast::CitationMode::AuthorInText,
2130 Some(s)
2131 if s.starts_with("na") || s.starts_with("noauthor") || s.starts_with("suppress") =>
2132 {
2133 carta_ast::CitationMode::SuppressAuthor
2134 }
2135 _ => carta_ast::CitationMode::NormalCitation,
2136 }
2137}
2138
2139fn is_citation_key_char(c: char) -> bool {
2140 c.is_ascii_alphanumeric() || matches!(c, '_' | '-' | ':' | '.' | '/')
2141}
2142
2143fn strip_prefix_ci<'a>(s: &'a str, prefix: &str) -> Option<&'a str> {
2146 if s.len() >= prefix.len() && s.get(..prefix.len())?.eq_ignore_ascii_case(prefix) {
2147 s.get(prefix.len()..)
2148 } else {
2149 None
2150 }
2151}
2152
2153fn starts_with_ci(s: &str, prefix: &str) -> bool {
2154 strip_prefix_ci(s, prefix).is_some()
2155}
2156
2157#[allow(clippy::too_many_lines, clippy::match_same_arms)]
2162fn entity(name: &str) -> Option<&'static str> {
2163 let value = match name {
2164 "alpha" => "α",
2165 "beta" => "β",
2166 "gamma" => "γ",
2167 "delta" => "δ",
2168 "epsilon" => "ϵ",
2169 "zeta" => "ζ",
2170 "eta" => "η",
2171 "theta" => "θ",
2172 "iota" => "ι",
2173 "kappa" => "κ",
2174 "lambda" => "λ",
2175 "mu" => "μ",
2176 "nu" => "ν",
2177 "xi" => "ξ",
2178 "omicron" => "ο",
2179 "pi" => "π",
2180 "rho" => "ρ",
2181 "sigma" => "σ",
2182 "sigmaf" => "ς",
2183 "tau" => "τ",
2184 "upsilon" => "υ",
2185 "phi" => "φ",
2186 "chi" => "χ",
2187 "psi" => "ψ",
2188 "omega" => "ω",
2189 "varphi" => "ϕ",
2190 "vartheta" => "ϑ",
2191 "varpi" => "ϖ",
2192 "Alpha" => "Α",
2193 "Beta" => "Β",
2194 "Gamma" => "Γ",
2195 "Delta" => "Δ",
2196 "Epsilon" => "Ε",
2197 "Zeta" => "Ζ",
2198 "Eta" => "Η",
2199 "Theta" => "Θ",
2200 "Iota" => "Ι",
2201 "Kappa" => "Κ",
2202 "Lambda" => "Λ",
2203 "Mu" => "Μ",
2204 "Nu" => "Ν",
2205 "Xi" => "Ξ",
2206 "Omicron" => "Ο",
2207 "Pi" => "Π",
2208 "Rho" => "Ρ",
2209 "Sigma" => "Σ",
2210 "Tau" => "Τ",
2211 "Upsilon" => "Υ",
2212 "Phi" => "Φ",
2213 "Chi" => "Χ",
2214 "Psi" => "Ψ",
2215 "Omega" => "Ω",
2216 "pm" => "±",
2217 "mp" => "∓",
2218 "times" => "×",
2219 "div" => "÷",
2220 "cdot" => "ċ",
2221 "deg" => "°",
2222 "prime" => "′",
2223 "Prime" => "″",
2224 "infin" => "∞",
2225 "nabla" => "∇",
2226 "partial" => "∂",
2227 "forall" => "∀",
2228 "exist" => "∃",
2229 "empty" => "∅",
2230 "isin" => "∈",
2231 "notin" => "∉",
2232 "ni" => "∋",
2233 "sum" => "∑",
2234 "prod" => "∏",
2235 "minus" => "−",
2236 "lowast" => "∗",
2237 "radic" => "√",
2238 "prop" => "∝",
2239 "ang" => "∠",
2240 "or" => "∨",
2241 "cap" => "∩",
2242 "cup" => "∪",
2243 "int" => "∫",
2244 "there4" => "∴",
2245 "sim" => "∼",
2246 "cong" => "≅",
2247 "asymp" => "≈",
2248 "ne" => "≠",
2249 "equiv" => "≡",
2250 "le" => "≤",
2251 "ge" => "≥",
2252 "sub" => "⊂",
2253 "sup" => "⊃",
2254 "sube" => "⊆",
2255 "supe" => "⊇",
2256 "oplus" => "⊕",
2257 "otimes" => "⊗",
2258 "perp" => "⊥",
2259 "sdot" => "⋅",
2260 "larr" => "←",
2261 "rarr" => "→",
2262 "uarr" => "↑",
2263 "darr" => "↓",
2264 "harr" => "↔",
2265 "lArr" => "⇐",
2266 "rArr" => "⇒",
2267 "uArr" => "⇑",
2268 "dArr" => "⇓",
2269 "hArr" => "⇔",
2270 "Leftarrow" => "⇐",
2271 "Rightarrow" => "⇒",
2272 "Leftrightarrow" => "⇔",
2273 "copy" => "©",
2274 "reg" => "®",
2275 "trade" => "™",
2276 "euro" => "€",
2277 "cent" => "¢",
2278 "pound" => "£",
2279 "yen" => "¥",
2280 "sect" => "§",
2281 "para" => "¶",
2282 "middot" => "·",
2283 "hellip" => "…",
2284 "dots" => "…",
2285 "amp" => "&",
2286 "lt" => "<",
2287 "gt" => ">",
2288 "ndash" => "–",
2289 "mdash" => "—",
2290 "lsquo" => "‘",
2291 "rsquo" => "’",
2292 "ldquo" => "“",
2293 "rdquo" => "”",
2294 "laquo" => "«",
2295 "raquo" => "»",
2296 "nbsp" => "\u{a0}",
2297 "shy" => "\u{ad}",
2298 "aacute" => "á",
2299 "eacute" => "é",
2300 "iacute" => "í",
2301 "oacute" => "ó",
2302 "uacute" => "ú",
2303 "auml" => "ä",
2304 "euml" => "ë",
2305 "iuml" => "ï",
2306 "ouml" => "ö",
2307 "uuml" => "ü",
2308 "ntilde" => "ñ",
2309 "ccedil" => "ç",
2310 "szlig" => "ß",
2311 "dagger" => "†",
2312 "Dagger" => "‡",
2313 "bull" => "•",
2314 "permil" => "‰",
2315 "frac12" => "½",
2316 "frac14" => "¼",
2317 "frac34" => "¾",
2318 "sup2" => "²",
2319 "sup3" => "³",
2320 "plusmn" => "±",
2321 _ => return None,
2322 };
2323 Some(value)
2324}
2325
2326#[cfg(test)]
2327mod tests {
2328 #![allow(clippy::indexing_slicing)]
2331 use super::*;
2332
2333 fn doc(input: &str) -> Document {
2334 let mut options = ReaderOptions::default();
2335 options.extensions = Extensions::from_list(&[
2336 Extension::AutoIdentifiers,
2337 Extension::Citations,
2338 Extension::TaskLists,
2339 ]);
2340 OrgReader.read(input, &options).unwrap()
2341 }
2342
2343 fn blocks(input: &str) -> Vec<Block> {
2344 doc(input).blocks
2345 }
2346
2347 #[test]
2348 fn paragraph_with_emphasis() {
2349 let b = blocks("Hello *world* /italic/ =verb= ~code~ +strike+.");
2350 assert_eq!(b.len(), 1);
2351 match &b[0] {
2352 Block::Para(inlines) => {
2353 assert!(inlines.contains(&Inline::Strong(vec![Inline::Str("world".into())])));
2354 assert!(inlines.contains(&Inline::Emph(vec![Inline::Str("italic".into())])));
2355 }
2356 other => panic!("expected paragraph, got {other:?}"),
2357 }
2358 }
2359
2360 #[test]
2361 fn headline_levels_and_ids() {
2362 let b = blocks("* First\n** Second");
2363 match &b[0] {
2364 Block::Header(1, attr, _) => assert_eq!(attr.id, "first"),
2365 other => panic!("expected header, got {other:?}"),
2366 }
2367 match &b[1] {
2368 Block::Header(2, attr, _) => assert_eq!(attr.id, "second"),
2369 other => panic!("expected header, got {other:?}"),
2370 }
2371 }
2372
2373 #[test]
2374 fn todo_keyword_and_tags() {
2375 let b = blocks("* TODO Task :work:");
2376 match &b[0] {
2377 Block::Header(1, attr, inlines) => {
2378 assert_eq!(attr.id, "task");
2379 assert!(
2380 matches!(inlines.first(), Some(Inline::Span(a, _)) if a.classes == ["todo", "TODO"])
2381 );
2382 }
2383 other => panic!("expected header, got {other:?}"),
2384 }
2385 }
2386
2387 #[test]
2388 fn src_block_becomes_code_block() {
2389 let b = blocks("#+BEGIN_SRC python\nprint(1)\n#+END_SRC");
2390 match &b[0] {
2391 Block::CodeBlock(attr, text) => {
2392 assert_eq!(attr.classes, ["python"]);
2393 assert_eq!(text, "print(1)\n");
2394 }
2395 other => panic!("expected code block, got {other:?}"),
2396 }
2397 }
2398
2399 #[test]
2400 fn bullet_and_ordered_lists() {
2401 assert!(
2402 matches!(blocks("- a\n- b").first(), Some(Block::BulletList(items)) if items.len() == 2)
2403 );
2404 assert!(matches!(
2405 blocks("1. a\n2. b").first(),
2406 Some(Block::OrderedList(..))
2407 ));
2408 }
2409
2410 #[test]
2411 fn definition_list() {
2412 match blocks("- term :: definition").first() {
2413 Some(Block::DefinitionList(entries)) => assert_eq!(entries.len(), 1),
2414 other => panic!("expected definition list, got {other:?}"),
2415 }
2416 }
2417
2418 #[test]
2419 fn link_and_image() {
2420 let b = blocks("[[https://example.com][label]] [[./x.png]]");
2421 match &b[0] {
2422 Block::Para(inlines) => {
2423 assert!(inlines.iter().any(|i| matches!(i, Inline::Link(..))));
2424 assert!(inlines.iter().any(|i| matches!(i, Inline::Image(..))));
2425 }
2426 other => panic!("expected paragraph, got {other:?}"),
2427 }
2428 }
2429
2430 #[test]
2431 fn footnote_reference_resolves() {
2432 let b = blocks("Text[fn:1] more.\n\n[fn:1] The note.");
2433 match &b[0] {
2434 Block::Para(inlines) => {
2435 assert!(inlines.iter().any(|i| matches!(i, Inline::Note(_))));
2436 }
2437 other => panic!("expected paragraph, got {other:?}"),
2438 }
2439 }
2440
2441 #[test]
2442 fn table_with_header() {
2443 match blocks("| a | b |\n|---+---|\n| 1 | 2 |").first() {
2444 Some(Block::Table(table)) => {
2445 assert_eq!(table.head.rows.len(), 1);
2446 assert_eq!(table.bodies.len(), 1);
2447 }
2448 other => panic!("expected table, got {other:?}"),
2449 }
2450 }
2451
2452 #[test]
2453 fn metadata_title() {
2454 let d = doc("#+TITLE: My Doc\n\nbody");
2455 assert!(d.meta.contains_key("title"));
2456 }
2457
2458 #[test]
2459 fn subscript_and_superscript() {
2460 let b = blocks("H_2O and x^2");
2461 match &b[0] {
2462 Block::Para(inlines) => {
2463 assert!(inlines.iter().any(|i| matches!(i, Inline::Subscript(_))));
2464 assert!(inlines.iter().any(|i| matches!(i, Inline::Superscript(_))));
2465 }
2466 other => panic!("expected paragraph, got {other:?}"),
2467 }
2468 }
2469
2470 #[test]
2471 fn special_strings_dashes() {
2472 let b = blocks("em --- en -- dots ...");
2473 match &b[0] {
2474 Block::Para(inlines) => {
2475 let text = carta_ast::to_plain_text(inlines);
2476 assert!(text.contains('\u{2014}'));
2477 assert!(text.contains('\u{2013}'));
2478 assert!(text.contains('\u{2026}'));
2479 }
2480 other => panic!("expected paragraph, got {other:?}"),
2481 }
2482 }
2483
2484 fn doc_with(input: &str, exts: &[Extension]) -> Document {
2485 let mut options = ReaderOptions::default();
2486 options.extensions = Extensions::from_list(exts);
2487 OrgReader.read(input, &options).unwrap()
2488 }
2489
2490 #[test]
2491 fn smart_quotes_and_apostrophe() {
2492 let d = doc_with("He said \"hi\" and it's 'fine'.", &[Extension::Smart]);
2493 let Block::Para(inlines) = &d.blocks[0] else {
2494 panic!("expected paragraph");
2495 };
2496 assert!(inlines.contains(&Inline::Quoted(
2497 QuoteType::DoubleQuote,
2498 vec![Inline::Str("hi".into())]
2499 )));
2500 assert!(inlines.contains(&Inline::Quoted(
2501 QuoteType::SingleQuote,
2502 vec![Inline::Str("fine".into())]
2503 )));
2504 assert!(inlines.contains(&Inline::Str("it\u{2019}s".into())));
2505 }
2506
2507 #[test]
2508 fn quotes_literal_without_smart() {
2509 let d = doc_with("say \"hi\".", &[]);
2510 let Block::Para(inlines) = &d.blocks[0] else {
2511 panic!("expected paragraph");
2512 };
2513 assert!(inlines.iter().all(|i| !matches!(i, Inline::Quoted(..))));
2514 }
2515
2516 #[test]
2517 fn gfm_and_ascii_identifiers() {
2518 let gfm = doc_with(
2519 "* Foo Bar 1.2",
2520 &[Extension::AutoIdentifiers, Extension::GfmAutoIdentifiers],
2521 );
2522 assert!(matches!(&gfm.blocks[0], Block::Header(_, a, _) if a.id == "foo-bar-12"));
2523
2524 let ascii = doc_with(
2525 "* Café Résumé",
2526 &[Extension::AutoIdentifiers, Extension::AsciiIdentifiers],
2527 );
2528 assert!(matches!(&ascii.blocks[0], Block::Header(_, a, _) if a.id == "cafe-resume"));
2529 }
2530
2531 #[test]
2532 fn checkbox_literal_without_task_lists() {
2533 let d = doc_with("- [X] item", &[]);
2534 let Block::BulletList(items) = &d.blocks[0] else {
2535 panic!("expected bullet list");
2536 };
2537 let Block::Plain(inlines) = &items[0][0] else {
2538 panic!("expected plain");
2539 };
2540 assert!(inlines.contains(&Inline::Str("[X]".into())));
2541 }
2542
2543 #[test]
2544 fn entity_replacement() {
2545 let b = blocks("\\alpha and \\unknownentity");
2546 match &b[0] {
2547 Block::Para(inlines) => {
2548 assert!(carta_ast::to_plain_text(inlines).contains('α'));
2549 assert!(inlines.iter().any(|i| matches!(i, Inline::RawInline(..))));
2550 }
2551 other => panic!("expected paragraph, got {other:?}"),
2552 }
2553 }
2554}