1use std::collections::{BTreeMap, BTreeSet};
15
16use carta_ast::{
17 Alignment, Attr, Block, Caption, Cell, ColSpec, ColWidth, Document, Inline, ListAttributes,
18 ListNumberDelim, ListNumberStyle, MetaValue, Row, Table, TableBody, TableFoot, TableHead,
19 Target, slug, slug_gfm, to_plain_text,
20};
21use carta_core::{Extensions, Reader, ReaderOptions, Result};
22
23use crate::heading_ids::{IdRegistry, IdScheme, fold_to_ascii};
24use crate::inline_text::trim_inline_ends;
25
26type Strings = BTreeMap<String, String>;
29
30const MAX_STRING_DEPTH: usize = 8;
32
33const MAX_MACRO_EXPANSION_LINES: usize = 100_000;
36
37fn predefined_strings() -> Strings {
40 [
41 ("R", "\u{00ae}"),
42 ("Tm", "\u{2122}"),
43 ("lq", "\u{201c}"),
44 ("rq", "\u{201d}"),
45 ]
46 .into_iter()
47 .map(|(name, value)| (name.to_owned(), value.to_owned()))
48 .collect()
49}
50
51#[derive(Debug, Default, Clone, Copy)]
53pub struct ManReader;
54
55impl Reader for ManReader {
56 fn read(&self, input: &str, options: &ReaderOptions) -> Result<Document> {
57 let lines = logical_lines(input);
58 let mut parser = Parser::new(lines, options.extensions);
59 let blocks = parser.parse_blocks(Ctx::TOP);
60 Ok(Document {
61 meta: parser
62 .meta
63 .into_iter()
64 .map(|(k, v)| (k.into(), v))
65 .collect(),
66 blocks,
67 ..Document::default()
68 })
69 }
70}
71
72fn logical_lines(input: &str) -> Vec<String> {
76 let mut out = Vec::new();
77 let mut acc = String::new();
78 let mut continuing = false;
79 for raw in input.split('\n') {
80 let raw = raw.strip_suffix('\r').unwrap_or(raw);
81 if !continuing {
82 acc.clear();
83 }
84 acc.push_str(raw);
85 let trailing = acc.chars().rev().take_while(|&c| c == '\\').count();
86 if trailing % 2 == 1 {
87 acc.pop();
88 continuing = true;
89 } else {
90 out.push(std::mem::take(&mut acc));
91 continuing = false;
92 }
93 }
94 if continuing {
95 out.push(acc);
96 }
97 out
98}
99
100#[derive(Debug, Clone, Copy, PartialEq, Eq)]
104enum Font {
105 Regular,
106 Bold,
107 Italic,
108 BoldItalic,
109 Mono,
110 MonoBold,
111 MonoItalic,
112}
113
114impl Font {
115 fn wrap(self, inlines: Vec<Inline>) -> Vec<Inline> {
118 if inlines.is_empty() {
119 return Vec::new();
120 }
121 self.wrap_forced(inlines)
122 }
123
124 fn wrap_forced(self, inlines: Vec<Inline>) -> Vec<Inline> {
128 match self {
129 Font::Regular => inlines,
130 Font::Bold => vec![Inline::Strong(inlines)],
131 Font::Italic => vec![Inline::Emph(inlines)],
132 Font::BoldItalic => vec![Inline::Emph(vec![Inline::Strong(inlines)])],
133 Font::Mono => vec![code_inline(&inlines)],
134 Font::MonoBold => vec![Inline::Strong(vec![code_inline(&inlines)])],
135 Font::MonoItalic => vec![Inline::Emph(vec![code_inline(&inlines)])],
136 }
137 }
138}
139
140fn code_inline(inlines: &[Inline]) -> Inline {
142 let mut text = String::new();
143 collect_code_text(inlines, &mut text);
144 Inline::Code(Box::default(), text.into())
145}
146
147fn collect_code_text(inlines: &[Inline], out: &mut String) {
148 for inline in inlines {
149 match inline {
150 Inline::Str(s) => out.push_str(s),
151 Inline::Space => out.push(' '),
152 Inline::Strong(xs) | Inline::Emph(xs) => collect_code_text(xs, out),
153 _ => {}
154 }
155 }
156}
157
158#[derive(Debug, Clone, Copy)]
161struct Ctx {
162 in_inset: bool,
164 in_item: bool,
166}
167
168impl Ctx {
169 const TOP: Ctx = Ctx {
170 in_inset: false,
171 in_item: false,
172 };
173 const INSET: Ctx = Ctx {
174 in_inset: true,
175 in_item: false,
176 };
177 const ITEM: Ctx = Ctx {
178 in_inset: false,
179 in_item: true,
180 };
181}
182
183struct HeadingIds {
186 scheme: Option<IdScheme>,
187 ascii: bool,
188 registry: IdRegistry,
189}
190
191impl HeadingIds {
192 fn new(extensions: Extensions) -> Self {
193 Self {
194 scheme: IdScheme::select(extensions, false),
195 ascii: extensions.contains(carta_core::Extension::AsciiIdentifiers),
196 registry: IdRegistry::default(),
197 }
198 }
199
200 fn assign(&mut self, inlines: &[Inline]) -> String {
201 let Some(scheme) = self.scheme else {
202 return String::new();
203 };
204 let text = to_plain_text(inlines);
205 let base = match scheme {
208 IdScheme::Plain => slug(&text),
209 IdScheme::Gfm => slug_gfm(&text),
210 };
211 let base = if self.ascii {
216 let folded = fold_to_ascii(&base);
217 match scheme {
218 IdScheme::Plain => folded
219 .chars()
220 .skip_while(|c| !c.is_ascii_alphabetic())
221 .collect(),
222 IdScheme::Gfm => folded,
223 }
224 } else {
225 base
226 };
227 self.registry.assign_native(base)
228 }
229}
230
231struct Parser {
232 lines: Vec<String>,
233 pos: usize,
234 pending: std::collections::VecDeque<String>,
239 meta: BTreeMap<String, MetaValue>,
240 headings: HeadingIds,
241 strings: Strings,
243 macros: BTreeMap<String, Vec<String>>,
245 else_branch: bool,
247}
248
249impl Parser {
250 fn new(lines: Vec<String>, extensions: Extensions) -> Self {
251 Self {
252 lines,
253 pos: 0,
254 pending: std::collections::VecDeque::new(),
255 meta: BTreeMap::new(),
256 headings: HeadingIds::new(extensions),
257 strings: predefined_strings(),
258 macros: BTreeMap::new(),
259 else_branch: false,
260 }
261 }
262
263 fn peek(&self) -> Option<&str> {
264 self.pending
265 .front()
266 .map(String::as_str)
267 .or_else(|| self.lines.get(self.pos).map(String::as_str))
268 }
269
270 fn advance(&mut self) {
271 if self.pending.pop_front().is_none() {
272 self.pos += 1;
273 }
274 }
275
276 fn peek_request(&self) -> Option<&str> {
278 let line = self.peek()?;
279 if is_comment(line) {
280 return None;
281 }
282 control_parts(line).map(|(name, _)| name)
283 }
284
285 fn take_line(&mut self) -> Option<String> {
287 if let Some(line) = self.pending.pop_front() {
288 return Some(line);
289 }
290 let line = self.lines.get(self.pos).cloned();
291 if line.is_some() {
292 self.pos += 1;
293 }
294 line
295 }
296
297 fn reprocess_as(&mut self, content: &str) {
300 let content = content.trim_start_matches([' ', '\t']);
301 if content.is_empty() {
302 self.advance();
303 } else if let Some(slot) = self.pending.front_mut() {
304 content.clone_into(slot);
305 } else if let Some(slot) = self.lines.get_mut(self.pos) {
306 content.clone_into(slot);
307 } else {
308 self.advance();
309 }
310 }
311
312 fn collect_macro_definition(&mut self, end: &str) -> Vec<String> {
316 let mut body = Vec::new();
317 while let Some(line) = self.peek().map(str::to_owned) {
318 self.advance();
319 let is_end =
320 !is_comment(&line) && control_parts(&line).is_some_and(|(name, _)| name == end);
321 if is_end {
322 break;
323 }
324 body.push(reduce_copy_mode(&line));
325 }
326 body
327 }
328
329 fn expand_macro_call(&self, name: &str, args: &[String]) -> Vec<String> {
333 let mut out = Vec::new();
334 let mut active = BTreeSet::new();
335 self.expand_macro_into(name, args, &mut active, &mut out);
336 out
337 }
338
339 fn expand_macro_into(
340 &self,
341 name: &str,
342 args: &[String],
343 active: &mut BTreeSet<String>,
344 out: &mut Vec<String>,
345 ) {
346 if out.len() >= MAX_MACRO_EXPANSION_LINES || active.contains(name) {
347 return;
348 }
349 let Some(body) = self.macros.get(name) else {
350 return;
351 };
352 active.insert(name.to_owned());
353 for raw in body {
354 if out.len() >= MAX_MACRO_EXPANSION_LINES {
355 break;
356 }
357 match control_parts(raw) {
358 Some((inner, inner_rest))
359 if !is_comment(raw) && self.macros.contains_key(inner) =>
360 {
361 let inner_args = split_args(&substitute_macro_args(inner_rest, args));
363 self.expand_macro_into(inner, &inner_args, active, out);
364 }
365 Some(_) => out.push(raw.clone()),
368 None => out.push(substitute_macro_args(raw, args)),
370 }
371 }
372 active.remove(name);
373 }
374
375 #[allow(clippy::too_many_lines, clippy::match_same_arms)]
379 fn parse_blocks(&mut self, ctx: Ctx) -> Vec<Block> {
380 let mut blocks = Vec::new();
381 let mut fill = Vec::new();
382 let mut started = false;
385 while let Some(line) = self.peek().map(str::to_owned) {
386 if line.is_empty() {
387 flush_para(&mut fill, &mut blocks, &mut started);
388 self.advance();
389 continue;
390 }
391 let Some((name, rest)) = control_parts(&line) else {
392 self.advance();
393 append_text(&mut fill, tokenize(&line, Font::Regular, &self.strings));
394 started = true;
395 continue;
396 };
397 if is_comment(&line) {
398 self.advance();
399 continue;
400 }
401 match name {
402 "SH" | "SS" => {
403 if ctx.in_inset || ctx.in_item {
404 flush_para(&mut fill, &mut blocks, &mut started);
405 return blocks;
406 }
407 flush_para(&mut fill, &mut blocks, &mut started);
408 self.advance();
409 let level = if name == "SH" { 1 } else { 2 };
410 let inlines = self.heading_inlines(rest);
411 let id = self.headings.assign(&inlines);
412 blocks.push(Block::Header(
413 level,
414 Box::new(Attr {
415 id: id.into(),
416 ..Attr::default()
417 }),
418 inlines,
419 ));
420 }
421 "PP" | "LP" | "P" | "HP" => {
422 flush_para(&mut fill, &mut blocks, &mut started);
423 if ctx.in_item {
424 return blocks;
425 }
426 self.advance();
427 }
428 "TP" | "IP" => {
429 flush_para(&mut fill, &mut blocks, &mut started);
430 if ctx.in_item {
431 return blocks;
432 }
433 let list = self.parse_list();
434 blocks.extend(list);
435 }
436 "TQ" => {
437 flush_para(&mut fill, &mut blocks, &mut started);
438 if ctx.in_item {
439 return blocks;
440 }
441 self.advance();
442 }
443 "RS" => {
444 flush_para(&mut fill, &mut blocks, &mut started);
445 self.advance();
446 let inner = self.parse_blocks(Ctx::INSET);
447 if ctx.in_item {
448 blocks.extend(inner);
449 } else {
450 blocks.push(Block::BlockQuote(inner));
451 }
452 }
453 "RE" => {
454 flush_para(&mut fill, &mut blocks, &mut started);
455 self.advance();
456 if ctx.in_inset {
457 return blocks;
458 }
459 }
460 "nf" | "EX" => {
461 flush_para(&mut fill, &mut blocks, &mut started);
462 self.advance();
463 blocks.push(self.parse_verbatim());
464 }
465 "fi" | "EE" | "UE" | "ME" => {
466 flush_para(&mut fill, &mut blocks, &mut started);
467 self.advance();
468 }
469 "TS" => {
470 flush_para(&mut fill, &mut blocks, &mut started);
471 self.advance();
472 blocks.extend(self.parse_tbl());
473 }
474 "ds" => {
475 self.advance();
476 self.define_string(rest);
477 }
478 "br" => {
479 self.advance();
480 fill.push(Inline::LineBreak);
481 }
482 "sp" => {
483 flush_para(&mut fill, &mut blocks, &mut started);
484 self.advance();
485 }
486 "TH" => {
487 self.advance();
488 self.parse_title(rest);
489 }
490 "B" | "I" => {
491 self.advance();
492 let font = single_font(name);
493 let inlines = if rest.is_empty() {
494 let text = self.take_line().unwrap_or_default();
495 font.wrap(tokenize(&text, Font::Regular, &self.strings))
496 } else {
497 let text = split_args(rest).join(" ");
498 font.wrap_forced(tokenize(&text, Font::Regular, &self.strings))
499 };
500 append_text(&mut fill, inlines);
501 started = true;
502 }
503 "BR" | "RB" | "BI" | "IB" | "RI" | "IR" => {
504 self.advance();
505 let rest = if rest.is_empty() {
506 self.take_line().unwrap_or_default()
507 } else {
508 rest.to_owned()
509 };
510 append_text(
511 &mut fill,
512 alternating(&rest, fonts_for(name), &self.strings),
513 );
514 started = true;
515 }
516 "SY" => {
517 self.advance();
518 let text = if rest.is_empty() {
519 self.take_line().unwrap_or_default()
520 } else {
521 split_args(rest).join(" ")
522 };
523 append_text(&mut fill, font_macro(Font::Bold, &text, &self.strings));
524 started = true;
525 }
526 "OP" => {
527 self.advance();
528 append_text(&mut fill, option_synopsis(rest, &self.strings));
529 started = true;
530 }
531 "YS" => {
532 self.advance();
533 }
534 "UR" | "MT" => {
535 self.advance();
536 let url = split_args(rest).into_iter().next().unwrap_or_default();
537 let url = if name == "MT" {
538 format!("mailto:{url}")
539 } else {
540 url
541 };
542 if self.link_label_is_plain() {
543 self.parse_link(url, &mut fill);
544 started = true;
545 } else {
546 flush_para(&mut fill, &mut blocks, &mut started);
549 blocks.extend(self.parse_aborted_link());
550 }
551 }
552 "de" | "de1" => {
553 self.advance();
554 let args = split_args(rest);
555 let end = args.get(1).map_or(".", String::as_str).to_owned();
556 let body = self.collect_macro_definition(&end);
557 if let Some(name) = args.into_iter().next() {
558 self.macros.insert(name, body);
559 }
560 }
561 "if" => {
562 let (cond, branch) = split_condition(rest);
563 if condition_true(cond) {
564 self.reprocess_as(branch);
565 } else {
566 self.advance();
567 }
568 }
569 "ie" => {
570 let (cond, branch) = split_condition(rest);
571 let taken = condition_true(cond);
572 self.else_branch = !taken;
573 if taken {
574 self.reprocess_as(branch);
575 } else {
576 self.advance();
577 }
578 }
579 "el" => {
580 if self.else_branch {
581 self.else_branch = false;
582 self.reprocess_as(rest);
583 } else {
584 self.advance();
585 }
586 }
587 _ if self.macros.contains_key(name) => {
591 self.advance();
592 let args = split_args(rest);
593 let expansion = self.expand_macro_call(name, &args);
594 for line in expansion.into_iter().rev() {
595 self.pending.push_front(line);
596 }
597 }
598 _ if is_noop_request(name) => {
601 self.advance();
602 }
603 _ => {
604 flush_para(&mut fill, &mut blocks, &mut started);
605 self.advance();
606 }
607 }
608 }
609 flush_para(&mut fill, &mut blocks, &mut started);
610 blocks
611 }
612
613 fn heading_inlines(&mut self, rest: &str) -> Vec<Inline> {
616 if rest.is_empty() {
617 let next = self.take_line().unwrap_or_default();
618 tokenize(&next, Font::Regular, &self.strings)
619 } else {
620 tokenize(&split_args(rest).join(" "), Font::Regular, &self.strings)
621 }
622 }
623
624 fn parse_title(&mut self, rest: &str) {
626 let keys = ["title", "section", "date", "footer", "header"];
627 for (key, arg) in keys.iter().zip(split_args(rest)) {
628 if arg.is_empty() {
629 continue;
630 }
631 let inlines = tokenize(&arg, Font::Regular, &self.strings);
632 self.meta
633 .insert((*key).to_owned(), MetaValue::MetaInlines(inlines));
634 }
635 }
636
637 fn define_string(&mut self, rest: &str) {
641 let (name, value) = match rest.split_once([' ', '\t']) {
642 Some((name, value)) => (name, value),
643 None => (rest, ""),
644 };
645 if name.is_empty() {
646 return;
647 }
648 let value = match value.find("\\\"") {
649 Some(index) => value.get(..index).unwrap_or(value),
650 None => value,
651 };
652 let value = value.trim_end_matches([' ', '\t']);
653 self.strings.insert(name.to_owned(), value.to_owned());
654 }
655
656 fn parse_verbatim(&mut self) -> Block {
660 let mut text_lines: Vec<String> = Vec::new();
661 while let Some(line) = self.peek().map(str::to_owned) {
662 if let Some((name, rest)) = control_parts(&line) {
663 if is_comment(&line) {
664 self.advance();
665 continue;
666 }
667 match name {
668 "fi" | "EE" => {
669 self.advance();
670 break;
671 }
672 "SH" | "SS" => break,
673 "B" | "I" | "BR" | "RB" | "BI" | "IB" | "RI" | "IR" => {
674 self.advance();
675 text_lines.push(flatten(&split_args(rest).join(" "), &self.strings));
676 }
677 _ => self.advance(),
678 }
679 } else {
680 self.advance();
681 text_lines.push(flatten(&line, &self.strings));
682 }
683 }
684 Block::CodeBlock(Box::default(), text_lines.join("\n").into())
685 }
686
687 fn parse_tbl(&mut self) -> Vec<Block> {
693 let mut region: Vec<String> = Vec::new();
694 while let Some(line) = self.peek().map(str::to_owned) {
695 if let Some((name, _)) = control_parts(&line) {
696 if is_comment(&line) {
697 self.advance();
698 continue;
699 }
700 match name {
701 "TE" => {
702 self.advance();
703 break;
704 }
705 "SH" | "SS" => break,
706 _ => {
707 self.advance();
708 region.push(line);
709 }
710 }
711 } else {
712 self.advance();
713 region.push(line);
714 }
715 }
716 build_tbl(®ion).into_iter().collect()
717 }
718
719 fn parse_list(&mut self) -> Vec<Block> {
722 let mut out = Vec::new();
723 let mut pending: Option<Pending> = None;
724 while let Some(line) = self.peek().map(str::to_owned) {
725 let Some((name, rest)) = control_parts(&line) else {
726 break;
727 };
728 if is_comment(&line) {
729 self.advance();
730 continue;
731 }
732 match name {
733 "TP" => {
734 self.advance();
735 let mut term = self.read_term();
736 while self.peek_request() == Some("TQ") {
738 self.advance();
739 term.push(Inline::LineBreak);
740 term.extend(self.read_term());
741 }
742 let body = self.parse_blocks(Ctx::ITEM);
743 if body.is_empty() {
744 let rest = self.parse_list();
747 if rest.is_empty() {
748 flush_pending(&mut pending, &mut out);
749 out.push(Block::Para(term));
750 } else {
751 push_definition(&mut pending, &mut out, term, rest);
752 }
753 } else {
754 push_definition(&mut pending, &mut out, term, body);
755 }
756 }
757 "IP" => {
758 self.advance();
759 let args = split_args(rest);
760 match args.first() {
761 None => {
763 flush_pending(&mut pending, &mut out);
764 let body = self.parse_blocks(Ctx::ITEM);
765 if !body.is_empty() {
767 out.push(Block::BlockQuote(body));
768 }
769 }
770 Some(mark_raw) => {
771 let mark = flatten(mark_raw, &self.strings);
772 match classify_mark(&mark) {
773 Mark::Bullet => {
774 let body = self.item_body();
775 push_bullet(&mut pending, &mut out, body);
776 }
777 Mark::Ordered(attrs) => {
778 let body = self.item_body();
779 push_ordered(&mut pending, &mut out, attrs, body);
780 }
781 Mark::None | Mark::Text => {
784 let term = inlines_from_plain(&mark);
785 let body = self.item_body();
786 push_definition(&mut pending, &mut out, term, body);
787 }
788 }
789 }
790 }
791 }
792 _ => break,
793 }
794 }
795 flush_pending(&mut pending, &mut out);
796 out
797 }
798
799 fn item_body(&mut self) -> Vec<Block> {
802 let body = self.parse_blocks(Ctx::ITEM);
803 if body.is_empty() {
804 vec![Block::Para(Vec::new())]
805 } else {
806 body
807 }
808 }
809
810 fn read_term(&mut self) -> Vec<Inline> {
812 let Some(line) = self.take_line() else {
813 return Vec::new();
814 };
815 if let Some((name, rest)) = control_parts(&line) {
816 if is_comment(&line) {
817 return self.read_term();
818 }
819 match name {
820 "B" | "I" => {
821 let font = single_font(name);
822 return font_macro(font, &split_args(rest).join(" "), &self.strings);
823 }
824 "BR" | "RB" | "BI" | "IB" | "RI" | "IR" => {
825 return alternating(rest, fonts_for(name), &self.strings);
826 }
827 _ => return tokenize(rest, Font::Regular, &self.strings),
828 }
829 }
830 tokenize(&line, Font::Regular, &self.strings)
831 }
832
833 fn link_label_is_plain(&self) -> bool {
837 let lookahead = self
838 .pending
839 .iter()
840 .chain(self.lines.get(self.pos..).into_iter().flatten());
841 for line in lookahead {
842 if is_comment(line) {
843 continue;
844 }
845 if let Some((name, _)) = control_parts(line) {
846 return matches!(name, "UE" | "ME");
847 }
848 }
849 false
850 }
851
852 fn parse_link(&mut self, url: String, fill: &mut Vec<Inline>) {
856 let mut label_text = String::new();
857 let mut trailing = String::new();
858 while let Some(line) = self.peek().map(str::to_owned) {
859 if is_comment(&line) {
860 self.advance();
861 continue;
862 }
863 self.advance();
864 if let Some((name, rest)) = control_parts(&line) {
865 if matches!(name, "UE" | "ME") {
866 trailing = split_args(rest).join(" ");
867 }
868 break;
869 }
870 label_text.push_str(&line);
871 }
872 let label = tokenize(&label_text, Font::Regular, &self.strings);
873 append_text(
874 fill,
875 vec![Inline::Link(
876 Box::default(),
877 label,
878 Box::new(Target {
879 url: url.into(),
880 title: carta_ast::Text::default(),
881 }),
882 )],
883 );
884 if !trailing.is_empty() {
885 fill.extend(tokenize(&trailing, Font::Regular, &self.strings));
886 }
887 }
888
889 fn parse_aborted_link(&mut self) -> Vec<Block> {
893 let mut fill = Vec::new();
894 while let Some(line) = self.peek().map(str::to_owned) {
895 let Some((name, rest)) = control_parts(&line) else {
896 self.advance();
897 append_text(&mut fill, tokenize(&line, Font::Regular, &self.strings));
898 continue;
899 };
900 if is_comment(&line) {
901 self.advance();
902 continue;
903 }
904 match name {
905 "UE" | "ME" => {
906 self.advance();
907 break;
908 }
909 "br" => {
910 self.advance();
911 fill.push(Inline::LineBreak);
912 }
913 "B" | "I" => {
914 self.advance();
915 let font = single_font(name);
916 let inlines = if rest.is_empty() {
917 let text = self.take_line().unwrap_or_default();
918 font.wrap(tokenize(&text, Font::Regular, &self.strings))
919 } else {
920 let text = split_args(rest).join(" ");
921 font.wrap_forced(tokenize(&text, Font::Regular, &self.strings))
922 };
923 append_text(&mut fill, inlines);
924 }
925 "BR" | "RB" | "BI" | "IB" | "RI" | "IR" => {
926 self.advance();
927 let rest = if rest.is_empty() {
928 self.take_line().unwrap_or_default()
929 } else {
930 rest.to_owned()
931 };
932 append_text(
933 &mut fill,
934 alternating(&rest, fonts_for(name), &self.strings),
935 );
936 }
937 _ => break,
938 }
939 }
940 trim_inline_ends(&mut fill);
941 if fill.is_empty() {
942 Vec::new()
943 } else {
944 vec![Block::Para(fill)]
945 }
946 }
947}
948
949fn single_font(name: &str) -> Font {
951 if name == "B" {
952 Font::Bold
953 } else {
954 Font::Italic
955 }
956}
957
958fn fonts_for(name: &str) -> [Font; 2] {
960 match name {
961 "BR" => [Font::Bold, Font::Regular],
962 "RB" => [Font::Regular, Font::Bold],
963 "BI" => [Font::Bold, Font::Italic],
964 "IB" => [Font::Italic, Font::Bold],
965 "RI" => [Font::Regular, Font::Italic],
966 _ => [Font::Italic, Font::Regular],
967 }
968}
969
970fn font_macro(font: Font, text: &str, strings: &Strings) -> Vec<Inline> {
974 font.wrap(tokenize(text, Font::Regular, strings))
975}
976
977fn alternating(rest: &str, fonts: [Font; 2], strings: &Strings) -> Vec<Inline> {
980 let mut out = Vec::new();
981 for (index, arg) in split_args(rest).into_iter().enumerate() {
982 let font = fonts.get(index % 2).copied().unwrap_or(Font::Regular);
983 out.extend(font.wrap(tokenize(&arg, Font::Regular, strings)));
984 }
985 out
986}
987
988fn option_synopsis(rest: &str, strings: &Strings) -> Vec<Inline> {
991 let args = split_args(rest);
992 let mut out = vec![Inline::Str("[".into())];
993 if let Some(name) = args.first() {
994 out.push(Inline::Space);
995 out.extend(font_macro(Font::Bold, name, strings));
996 }
997 let argument = args.get(1..).unwrap_or(&[]).join(" ");
998 if !argument.is_empty() {
999 out.push(Inline::Space);
1000 out.extend(tokenize(&argument, Font::Regular, strings));
1001 }
1002 out.push(Inline::Space);
1003 out.push(Inline::Str("]".into()));
1004 out
1005}
1006
1007enum Mark {
1009 None,
1010 Bullet,
1011 Ordered(ListAttributes),
1012 Text,
1013}
1014
1015fn inlines_from_plain(text: &str) -> Vec<Inline> {
1018 let mut out = Vec::new();
1019 for word in text.split_whitespace() {
1020 if !out.is_empty() {
1021 out.push(Inline::Space);
1022 }
1023 out.push(Inline::Str(word.into()));
1024 }
1025 out
1026}
1027
1028fn classify_mark(mark: &str) -> Mark {
1031 if mark.is_empty() {
1032 return Mark::None;
1033 }
1034 if matches!(mark, "*" | "\u{2022}" | "\u{00b7}" | "-" | "+") {
1035 return Mark::Bullet;
1036 }
1037 if let Some(attrs) = parse_enumerator(mark) {
1038 return Mark::Ordered(attrs);
1039 }
1040 Mark::Text
1041}
1042
1043fn parse_enumerator(mark: &str) -> Option<ListAttributes> {
1046 if let Some(inner) = mark.strip_prefix('(').and_then(|m| m.strip_suffix(')')) {
1047 return enumerator_body(inner, ListNumberDelim::TwoParens);
1048 }
1049 let (body, delim) = match mark.strip_suffix('.') {
1050 Some(body) => (body, ListNumberDelim::Period),
1051 None => match mark.strip_suffix(')') {
1052 Some(body) => (body, ListNumberDelim::OneParen),
1053 None => (mark, ListNumberDelim::DefaultDelim),
1054 },
1055 };
1056 enumerator_body(body, delim)
1057}
1058
1059fn enumerator_body(body: &str, delim: ListNumberDelim) -> Option<ListAttributes> {
1062 if body.is_empty() {
1063 return None;
1064 }
1065 if body.chars().all(|c| c.is_ascii_digit()) {
1066 let start = body.parse().ok()?;
1067 return Some(ListAttributes {
1068 start,
1069 style: ListNumberStyle::Decimal,
1070 delim,
1071 });
1072 }
1073 if let Some(start) = roman_value(body) {
1074 let style = if body.chars().next().is_some_and(char::is_uppercase) {
1075 ListNumberStyle::UpperRoman
1076 } else {
1077 ListNumberStyle::LowerRoman
1078 };
1079 return Some(ListAttributes {
1080 start,
1081 style,
1082 delim,
1083 });
1084 }
1085 let mut chars = body.chars();
1086 if let (Some(c), None) = (chars.next(), chars.next())
1087 && c.is_ascii_alphabetic()
1088 {
1089 let start = i32::from((c.to_ascii_lowercase() as u8) - b'a') + 1;
1090 let style = if c.is_ascii_uppercase() {
1091 ListNumberStyle::UpperAlpha
1092 } else {
1093 ListNumberStyle::LowerAlpha
1094 };
1095 return Some(ListAttributes {
1096 start,
1097 style,
1098 delim,
1099 });
1100 }
1101 None
1102}
1103
1104fn roman_value(text: &str) -> Option<i32> {
1106 fn digit(c: char) -> Option<i32> {
1107 match c.to_ascii_lowercase() {
1108 'i' => Some(1),
1109 'v' => Some(5),
1110 'x' => Some(10),
1111 'l' => Some(50),
1112 'c' => Some(100),
1113 'd' => Some(500),
1114 'm' => Some(1000),
1115 _ => None,
1116 }
1117 }
1118 let values: Vec<i32> = text.chars().map(digit).collect::<Option<Vec<_>>>()?;
1119 let mut total = 0;
1120 for (index, &value) in values.iter().enumerate() {
1121 match values.get(index + 1) {
1122 Some(&next) if value < next => total -= value,
1123 _ => total += value,
1124 }
1125 }
1126 (total > 0).then_some(total)
1127}
1128
1129enum Pending {
1132 Definition(Vec<(Vec<Inline>, Vec<Vec<Block>>)>),
1133 Bullet(Vec<Vec<Block>>),
1134 Ordered(ListAttributes, Vec<Vec<Block>>),
1135}
1136
1137fn flush_pending(pending: &mut Option<Pending>, out: &mut Vec<Block>) {
1138 match pending.take() {
1139 Some(Pending::Definition(items)) => out.push(Block::DefinitionList(items)),
1140 Some(Pending::Bullet(items)) => out.push(Block::BulletList(items)),
1141 Some(Pending::Ordered(attrs, items)) => out.push(Block::OrderedList(attrs, items)),
1142 None => {}
1143 }
1144}
1145
1146fn push_definition(
1147 pending: &mut Option<Pending>,
1148 out: &mut Vec<Block>,
1149 term: Vec<Inline>,
1150 body: Vec<Block>,
1151) {
1152 if let Some(Pending::Definition(items)) = pending {
1153 items.push((term, vec![body]));
1154 return;
1155 }
1156 flush_pending(pending, out);
1157 *pending = Some(Pending::Definition(vec![(term, vec![body])]));
1158}
1159
1160fn push_bullet(pending: &mut Option<Pending>, out: &mut Vec<Block>, body: Vec<Block>) {
1161 if let Some(Pending::Bullet(items)) = pending {
1162 items.push(body);
1163 return;
1164 }
1165 flush_pending(pending, out);
1166 *pending = Some(Pending::Bullet(vec![body]));
1167}
1168
1169fn push_ordered(
1170 pending: &mut Option<Pending>,
1171 out: &mut Vec<Block>,
1172 attrs: ListAttributes,
1173 body: Vec<Block>,
1174) {
1175 if let Some(Pending::Ordered(_, items)) = pending {
1176 items.push(body);
1177 return;
1178 }
1179 flush_pending(pending, out);
1180 *pending = Some(Pending::Ordered(attrs, vec![body]));
1181}
1182
1183fn flush_para(fill: &mut Vec<Inline>, blocks: &mut Vec<Block>, started: &mut bool) {
1187 let mut trimmed = std::mem::take(fill);
1188 trim_inline_ends(&mut trimmed);
1189 if !trimmed.is_empty() {
1190 blocks.push(Block::Para(trimmed));
1191 } else if *started {
1192 blocks.push(Block::Para(Vec::new()));
1193 }
1194 *started = false;
1195}
1196
1197fn append_text(fill: &mut Vec<Inline>, inlines: Vec<Inline>) {
1200 if inlines.is_empty() {
1201 return;
1202 }
1203 if !fill.is_empty() && !matches!(fill.last(), Some(Inline::LineBreak)) {
1204 fill.push(Inline::Space);
1205 }
1206 fill.extend(inlines);
1207}
1208
1209fn is_control(line: &str) -> bool {
1211 line.starts_with('.') || line.starts_with('\'')
1212}
1213
1214fn is_comment(line: &str) -> bool {
1216 if !is_control(line) {
1217 return false;
1218 }
1219 let body = line.get(1..).unwrap_or("");
1220 body.starts_with("\\\"") || body.starts_with("\\#")
1221}
1222
1223fn control_parts(line: &str) -> Option<(&str, &str)> {
1227 if !is_control(line) {
1228 return None;
1229 }
1230 let body = line.get(1..).unwrap_or("").trim_start_matches([' ', '\t']);
1231 match body.split_once([' ', '\t']) {
1232 Some((name, rest)) => Some((name, rest.trim_start_matches([' ', '\t']))),
1233 None => Some((body, "")),
1234 }
1235}
1236
1237fn is_noop_request(name: &str) -> bool {
1241 name.chars().all(|c| matches!(c, '.' | '\''))
1242}
1243
1244fn split_condition(rest: &str) -> (&str, &str) {
1247 match rest.split_once([' ', '\t']) {
1248 Some((cond, branch)) => (cond, branch),
1249 None => (rest, ""),
1250 }
1251}
1252
1253fn condition_true(cond: &str) -> bool {
1257 cond == "n" || cond == "1"
1258}
1259
1260fn split_args(input: &str) -> Vec<String> {
1264 let mut args = Vec::new();
1265 let mut chars = input.chars().peekable();
1266 loop {
1267 while matches!(chars.peek(), Some(' ' | '\t')) {
1268 chars.next();
1269 }
1270 if chars.peek().is_none() {
1271 break;
1272 }
1273 let mut arg = String::new();
1274 if chars.peek() == Some(&'"') {
1275 chars.next();
1276 while let Some(c) = chars.next() {
1277 if c == '"' {
1278 if chars.peek() == Some(&'"') {
1279 chars.next();
1280 arg.push('"');
1281 } else {
1282 break;
1283 }
1284 } else {
1285 arg.push(c);
1286 }
1287 }
1288 } else {
1289 while let Some(&c) = chars.peek() {
1290 if c == ' ' || c == '\t' {
1291 break;
1292 }
1293 chars.next();
1294 arg.push(c);
1295 if c == '\\'
1296 && let Some(next) = chars.next()
1297 {
1298 arg.push(next);
1299 }
1300 }
1301 }
1302 args.push(arg);
1303 }
1304 args
1305}
1306
1307fn reduce_copy_mode(line: &str) -> String {
1316 if !line.contains('\\') {
1317 return line.to_owned();
1318 }
1319 let mut out = String::with_capacity(line.len());
1320 let mut chars = line.chars().peekable();
1321 while let Some(c) = chars.next() {
1322 if c == '\\' && chars.peek() == Some(&'\\') {
1323 chars.next();
1324 }
1325 out.push(c);
1326 }
1327 out
1328}
1329
1330fn substitute_macro_args(line: &str, args: &[String]) -> String {
1331 if !line.contains("\\$") {
1332 return line.to_owned();
1333 }
1334 let mut out = String::with_capacity(line.len());
1335 let mut chars = line.chars().peekable();
1336 while let Some(c) = chars.next() {
1337 if c != '\\' {
1338 out.push(c);
1339 continue;
1340 }
1341 match chars.peek() {
1342 Some('$') => {
1343 chars.next();
1344 push_macro_arg(&mut chars, args, &mut out);
1345 }
1346 Some('\\') => {
1349 chars.next();
1350 out.push('\\');
1351 out.push('\\');
1352 }
1353 _ => out.push('\\'),
1354 }
1355 }
1356 out
1357}
1358
1359fn push_macro_arg(
1362 chars: &mut std::iter::Peekable<std::str::Chars<'_>>,
1363 args: &[String],
1364 out: &mut String,
1365) {
1366 if let Some(&digit) = chars.peek()
1367 && let Some(index) = digit.to_digit(10)
1368 {
1369 chars.next();
1370 if index >= 1
1371 && let Some(arg) = args.get((index - 1) as usize)
1372 {
1373 out.push_str(arg);
1374 }
1375 }
1376}
1377
1378enum Atom {
1381 Char(Font, char),
1382 Space(char),
1383}
1384
1385fn tokenize(text: &str, start_font: Font, strings: &Strings) -> Vec<Inline> {
1389 let atoms = scan(text, start_font, strings);
1390 let mut result: Vec<Inline> = Vec::new();
1391 let mut run: Vec<Inline> = Vec::new();
1392 let mut run_font = Font::Regular;
1393 let mut word = String::new();
1394 let mut word_font = Font::Regular;
1395 let mut pending_space = false;
1396
1397 let commit_word = |word: &mut String,
1398 word_font: Font,
1399 run: &mut Vec<Inline>,
1400 run_font: &mut Font,
1401 result: &mut Vec<Inline>,
1402 pending_space: &mut bool| {
1403 if word.is_empty() {
1404 return;
1405 }
1406 let text = std::mem::take(word);
1407 if !run.is_empty() && word_font == *run_font {
1408 if *pending_space {
1409 run.push(Inline::Space);
1410 }
1411 run.push(Inline::Str(text.into()));
1412 } else {
1413 flush_run(run, *run_font, result);
1414 if *pending_space {
1415 push_space(result);
1416 }
1417 *run_font = word_font;
1418 run.push(Inline::Str(text.into()));
1419 }
1420 *pending_space = false;
1421 };
1422
1423 for atom in atoms {
1424 match atom {
1425 Atom::Char(font, c) => {
1426 if !word.is_empty() && font != word_font {
1427 commit_word(
1428 &mut word,
1429 word_font,
1430 &mut run,
1431 &mut run_font,
1432 &mut result,
1433 &mut pending_space,
1434 );
1435 }
1436 if word.is_empty() {
1437 word_font = font;
1438 }
1439 word.push(c);
1440 }
1441 Atom::Space(_) => {
1442 commit_word(
1443 &mut word,
1444 word_font,
1445 &mut run,
1446 &mut run_font,
1447 &mut result,
1448 &mut pending_space,
1449 );
1450 pending_space = true;
1451 }
1452 }
1453 }
1454 commit_word(
1455 &mut word,
1456 word_font,
1457 &mut run,
1458 &mut run_font,
1459 &mut result,
1460 &mut pending_space,
1461 );
1462 flush_run(&mut run, run_font, &mut result);
1463 trim_inline_ends(&mut result);
1464 result
1465}
1466
1467fn flush_run(run: &mut Vec<Inline>, run_font: Font, result: &mut Vec<Inline>) {
1468 if !run.is_empty() {
1469 result.extend(run_font.wrap(std::mem::take(run)));
1470 }
1471}
1472
1473fn push_space(result: &mut Vec<Inline>) {
1475 if !result.is_empty() && !matches!(result.last(), Some(Inline::Space)) {
1476 result.push(Inline::Space);
1477 }
1478}
1479
1480fn flatten(text: &str, strings: &Strings) -> String {
1483 let mut out = String::new();
1484 for atom in scan(text, Font::Regular, strings) {
1485 match atom {
1486 Atom::Char(_, c) | Atom::Space(c) => out.push(c),
1487 }
1488 }
1489 out
1490}
1491
1492fn scan(text: &str, start_font: Font, strings: &Strings) -> Vec<Atom> {
1494 let mut atoms = Vec::new();
1495 let mut font = start_font;
1496 let mut previous = start_font;
1497 scan_into(text, &mut font, &mut previous, &mut atoms, strings, 0);
1498 atoms
1499}
1500
1501#[allow(clippy::too_many_lines, clippy::match_same_arms)]
1507fn scan_into(
1508 text: &str,
1509 font: &mut Font,
1510 previous: &mut Font,
1511 atoms: &mut Vec<Atom>,
1512 strings: &Strings,
1513 depth: usize,
1514) {
1515 let mut chars = text.chars().peekable();
1516 while let Some(c) = chars.next() {
1517 if c == ' ' || c == '\t' {
1518 atoms.push(Atom::Space(c));
1519 continue;
1520 }
1521 if c != '\\' {
1522 atoms.push(Atom::Char(*font, c));
1523 continue;
1524 }
1525 let Some(&escape) = chars.peek() else {
1526 break;
1527 };
1528 match escape {
1529 'f' => {
1530 chars.next();
1531 let name = read_escape_name(&mut chars);
1532 apply_font(&name, font, previous);
1533 }
1534 '"' | '#' => break,
1535 '-' => {
1536 chars.next();
1537 atoms.push(Atom::Char(*font, '-'));
1538 }
1539 'e' | '\\' => {
1540 chars.next();
1541 atoms.push(Atom::Char(*font, '\\'));
1542 }
1543 '.' => {
1544 chars.next();
1545 atoms.push(Atom::Char(*font, '.'));
1546 }
1547 ' ' => {
1550 chars.next();
1551 atoms.push(Atom::Space(' '));
1552 }
1553 't' => {
1554 chars.next();
1555 atoms.push(Atom::Space('\t'));
1556 }
1557 '~' => {
1558 chars.next();
1559 atoms.push(Atom::Char(*font, '\u{00a0}'));
1560 }
1561 '0' => {
1562 chars.next();
1563 atoms.push(Atom::Char(*font, '\u{2007}'));
1564 }
1565 '^' => {
1566 chars.next();
1567 atoms.push(Atom::Char(*font, '\u{200a}'));
1568 }
1569 '|' => {
1570 chars.next();
1571 atoms.push(Atom::Char(*font, '\u{2006}'));
1572 }
1573 '&' | ')' | ',' | '/' | ':' | '!' | '%' | '{' | '}' | 'c' | 'u' | 'd' => {
1576 chars.next();
1577 }
1578 '(' => {
1579 chars.next();
1580 let name: String = (&mut chars).take(2).collect();
1581 push_chars(atoms, *font, special_char(&name));
1582 }
1583 '[' => {
1584 chars.next();
1585 let name = read_delimited(&mut chars, ']');
1586 push_chars(atoms, *font, bracket_char(&name));
1587 }
1588 '*' => {
1589 chars.next();
1590 let name = read_escape_name(&mut chars);
1591 if depth < MAX_STRING_DEPTH
1592 && let Some(value) = strings.get(&name)
1593 {
1594 scan_into(value, font, previous, atoms, strings, depth + 1);
1595 }
1596 }
1597 's' => {
1598 chars.next();
1599 skip_size(&mut chars);
1600 }
1601 'n' | 'k' => {
1603 chars.next();
1604 let _ = read_escape_name(&mut chars);
1605 }
1606 'z' => {
1608 chars.next();
1609 chars.next();
1610 }
1611 'm' | 'M' | 'F' | 'g' | 'V' | 'Y' | '$' => {
1615 chars.next();
1616 let _ = read_escape_name(&mut chars);
1617 }
1618 'p' | 'a' => {
1620 chars.next();
1621 }
1622 'C' => {
1624 chars.next();
1625 let name = match chars.next() {
1626 Some(delim) => read_delimited(&mut chars, delim),
1627 None => String::new(),
1628 };
1629 push_chars(atoms, *font, bracket_char(&name));
1630 }
1631 'h' | 'v' | 'w' | 'o' | 'b' | 'l' | 'L' | 'D' | 'N' | 'R' | 'A' | 'Z' | 'X' | 'B' => {
1632 chars.next();
1633 skip_delimited_arg(&mut chars);
1634 }
1635 other => {
1636 chars.next();
1637 atoms.push(Atom::Char(*font, other));
1638 }
1639 }
1640 }
1641}
1642
1643fn push_chars(atoms: &mut Vec<Atom>, font: Font, mapped: Option<char>) {
1644 atoms.push(Atom::Char(font, mapped.unwrap_or('\u{fffd}')));
1645}
1646
1647fn read_escape_name(chars: &mut std::iter::Peekable<std::str::Chars<'_>>) -> String {
1650 match chars.peek() {
1651 Some('(') => {
1652 chars.next();
1653 chars.take(2).collect()
1654 }
1655 Some('[') => {
1656 chars.next();
1657 read_delimited(chars, ']')
1658 }
1659 Some(_) => chars.next().map(String::from).unwrap_or_default(),
1660 None => String::new(),
1661 }
1662}
1663
1664fn read_delimited(chars: &mut std::iter::Peekable<std::str::Chars<'_>>, close: char) -> String {
1665 let mut name = String::new();
1666 for c in chars.by_ref() {
1667 if c == close {
1668 break;
1669 }
1670 name.push(c);
1671 }
1672 name
1673}
1674
1675fn skip_delimited_arg(chars: &mut std::iter::Peekable<std::str::Chars<'_>>) {
1677 let Some(delim) = chars.next() else {
1678 return;
1679 };
1680 for c in chars.by_ref() {
1681 if c == delim {
1682 break;
1683 }
1684 }
1685}
1686
1687fn skip_size(chars: &mut std::iter::Peekable<std::str::Chars<'_>>) {
1690 match chars.peek() {
1691 Some('(') => {
1692 chars.next();
1693 chars.next();
1694 chars.next();
1695 }
1696 Some('[') => {
1697 chars.next();
1698 read_delimited(chars, ']');
1699 }
1700 Some('\'') => {
1701 chars.next();
1702 read_delimited(chars, '\'');
1703 }
1704 _ => {
1705 if matches!(chars.peek(), Some('+' | '-')) {
1706 chars.next();
1707 }
1708 for _ in 0..2 {
1709 if matches!(chars.peek(), Some(c) if c.is_ascii_digit()) {
1710 chars.next();
1711 } else {
1712 break;
1713 }
1714 }
1715 }
1716 }
1717}
1718
1719#[allow(clippy::match_same_arms)]
1723fn apply_font(name: &str, font: &mut Font, previous: &mut Font) {
1724 let next = match name {
1725 "B" => Font::Bold,
1726 "I" => Font::Italic,
1727 "BI" | "IB" => Font::BoldItalic,
1728 "C" | "CW" | "CR" => Font::Mono,
1729 "CB" => Font::MonoBold,
1730 "CI" => Font::MonoItalic,
1731 "R" => Font::Regular,
1732 "P" | "" => {
1733 std::mem::swap(font, previous);
1734 return;
1735 }
1736 _ => Font::Regular,
1737 };
1738 *previous = *font;
1739 *font = next;
1740}
1741
1742fn bracket_char(name: &str) -> Option<char> {
1744 if let Some(hex) = name.strip_prefix('u') {
1745 return u32::from_str_radix(hex, 16).ok().and_then(char::from_u32);
1746 }
1747 special_char(name)
1748}
1749
1750fn build_tbl(region: &[String]) -> Option<Block> {
1759 let mut index = 0;
1760 let mut separator = "\t".to_owned();
1761 if let Some(first) = region.first()
1762 && first.trim_end().ends_with(';')
1763 {
1764 if let Some(sep) = tab_option(first) {
1765 separator = sep;
1766 }
1767 index = 1;
1768 }
1769
1770 let aligns = parse_col_aligns(region.get(index)?);
1771 if aligns.is_empty() {
1772 return None;
1773 }
1774 let columns = aligns.len();
1775 let mut data_start = None;
1776 for (offset, line) in region.iter().enumerate().skip(index) {
1777 if line.trim_end().ends_with('.') {
1778 data_start = Some(offset + 1);
1779 break;
1780 }
1781 }
1782 let data_start = data_start?;
1783
1784 if region
1787 .get(index..data_start)
1788 .unwrap_or(&[])
1789 .iter()
1790 .any(|line| format_has_span(line))
1791 {
1792 return Some(Block::Para(vec![Inline::Str("TABLE".into())]));
1793 }
1794
1795 let data = collapse_text_blocks(region.get(data_start..).unwrap_or(&[]), &separator);
1796
1797 let (head_lines, body_lines): (&[String], &[String]) =
1798 if data.get(1).is_some_and(|line| is_rule(line)) {
1799 (data.get(..1).unwrap_or(&[]), data.get(2..).unwrap_or(&[]))
1800 } else {
1801 (&[], &data)
1802 };
1803
1804 let col_specs = aligns
1805 .into_iter()
1806 .map(|align| ColSpec {
1807 align,
1808 width: ColWidth::ColWidthDefault,
1809 })
1810 .collect();
1811 let head = TableHead {
1812 attr: Attr::default(),
1813 rows: head_lines
1814 .iter()
1815 .map(|line| tbl_row(line, &separator, columns))
1816 .collect(),
1817 };
1818 let body = TableBody {
1819 attr: Attr::default(),
1820 row_head_columns: 0,
1821 head: Vec::new(),
1822 body: body_lines
1823 .iter()
1824 .filter(|line| !is_rule(line))
1825 .map(|line| tbl_row(line, &separator, columns))
1826 .collect(),
1827 };
1828
1829 Some(Block::Table(Box::new(Table {
1830 attr: Attr::default(),
1831 caption: Caption::default(),
1832 col_specs,
1833 head,
1834 bodies: vec![body],
1835 foot: TableFoot::default(),
1836 })))
1837}
1838
1839fn tab_option(options: &str) -> Option<String> {
1841 let inside = options.split_once("tab(")?.1.split_once(')')?.0;
1842 (!inside.is_empty()).then(|| inside.to_owned())
1843}
1844
1845fn parse_col_aligns(spec: &str) -> Vec<Alignment> {
1849 let mut aligns = Vec::new();
1850 let mut chars = spec.chars().peekable();
1851 while let Some(c) = chars.next() {
1852 match c.to_ascii_lowercase() {
1853 'l' | 'a' => aligns.push(Alignment::AlignLeft),
1854 'r' | 'n' => aligns.push(Alignment::AlignRight),
1855 'c' => aligns.push(Alignment::AlignCenter),
1856 'f' => match chars.peek() {
1857 Some('(') => {
1858 chars.next();
1859 chars.next();
1860 chars.next();
1861 }
1862 Some('[') => {
1863 chars.next();
1864 read_delimited(&mut chars, ']');
1865 }
1866 Some(_) => {
1867 chars.next();
1868 }
1869 None => {}
1870 },
1871 'w' | 'p' | 'v' | 'm' => {
1872 if chars.peek() == Some(&'(') {
1873 chars.next();
1874 for d in chars.by_ref() {
1875 if d == ')' {
1876 break;
1877 }
1878 }
1879 } else {
1880 while matches!(chars.peek(), Some(d) if d.is_ascii_digit()) {
1881 chars.next();
1882 }
1883 }
1884 }
1885 _ => {}
1886 }
1887 }
1888 aligns
1889}
1890
1891fn format_has_span(spec: &str) -> bool {
1894 let mut chars = spec.chars().peekable();
1895 while let Some(c) = chars.next() {
1896 match c.to_ascii_lowercase() {
1897 's' => return true,
1898 'f' => match chars.peek() {
1899 Some('(') => {
1900 chars.next();
1901 chars.next();
1902 chars.next();
1903 }
1904 Some('[') => {
1905 chars.next();
1906 read_delimited(&mut chars, ']');
1907 }
1908 Some(_) => {
1909 chars.next();
1910 }
1911 None => {}
1912 },
1913 'w' | 'p' | 'v' | 'm' => {
1914 if chars.peek() == Some(&'(') {
1915 chars.next();
1916 for d in chars.by_ref() {
1917 if d == ')' {
1918 break;
1919 }
1920 }
1921 } else {
1922 while matches!(chars.peek(), Some(d) if d.is_ascii_digit()) {
1923 chars.next();
1924 }
1925 }
1926 }
1927 _ => {}
1928 }
1929 }
1930 false
1931}
1932
1933fn collapse_text_blocks(data: &[String], separator: &str) -> Vec<String> {
1937 let mut out = Vec::new();
1938 let mut index = 0;
1939 while let Some(line) = data.get(index) {
1940 index += 1;
1941 if !line.split(separator).any(|field| field.trim() == "T{") {
1942 out.push(line.clone());
1943 continue;
1944 }
1945 let mut fields: Vec<String> = Vec::new();
1946 for field in line.split(separator) {
1947 if field.trim() != "T{" {
1948 fields.push(field.to_owned());
1949 continue;
1950 }
1951 let mut block: Vec<String> = Vec::new();
1952 let mut terminated = false;
1953 while let Some(block_line) = data.get(index) {
1954 index += 1;
1955 if block_line.trim_start().starts_with("T}") {
1956 let mut tail = block_line.split(separator);
1957 tail.next();
1958 fields.push(block.join(" "));
1959 fields.extend(tail.map(str::to_owned));
1960 terminated = true;
1961 break;
1962 }
1963 block.push(block_line.clone());
1964 }
1965 if !terminated {
1966 fields.push(block.join(" "));
1967 }
1968 }
1969 out.push(fields.join(separator));
1970 }
1971 out
1972}
1973
1974fn is_rule(line: &str) -> bool {
1976 let trimmed = line.trim();
1977 !trimmed.is_empty() && trimmed.chars().all(|c| c == '_' || c == '=')
1978}
1979
1980fn tbl_row(line: &str, separator: &str, columns: usize) -> Row {
1983 let mut cells: Vec<Cell> = line.split(separator).take(columns).map(tbl_cell).collect();
1984 while cells.len() < columns {
1985 cells.push(tbl_cell(""));
1986 }
1987 Row {
1988 attr: Attr::default(),
1989 cells,
1990 }
1991}
1992
1993fn tbl_cell(field: &str) -> Cell {
1996 let cleaned: String = field.chars().filter(|&c| c != '\\').collect();
1997 let mut inlines = Vec::new();
1998 for word in cleaned.split_whitespace() {
1999 if !inlines.is_empty() {
2000 inlines.push(Inline::Space);
2001 }
2002 inlines.push(Inline::Str(word.into()));
2003 }
2004 let content = if inlines.is_empty() {
2005 Vec::new()
2006 } else {
2007 vec![Block::Plain(inlines)]
2008 };
2009 Cell {
2010 attr: Attr::default(),
2011 align: Alignment::AlignDefault,
2012 row_span: 1,
2013 col_span: 1,
2014 content,
2015 }
2016}
2017
2018#[allow(clippy::match_same_arms, clippy::too_many_lines)]
2022fn special_char(name: &str) -> Option<char> {
2023 let c = match name {
2024 "hy" => '\u{2010}',
2026 "en" => '\u{2013}',
2027 "em" => '\u{2014}',
2028 "lq" => '\u{201c}',
2029 "rq" => '\u{201d}',
2030 "oq" => '\u{2018}',
2031 "cq" => '\u{2019}',
2032 "aq" => '\'',
2033 "dq" => '"',
2034 "Bq" => '\u{201e}',
2035 "bq" => '\u{201a}',
2036 "Fo" => '\u{00ab}',
2037 "Fc" => '\u{00bb}',
2038 "fo" => '\u{2039}',
2039 "fc" => '\u{203a}',
2040 "ga" => '`',
2041 "aa" => '\u{00b4}',
2042 "ha" => '^',
2043 "ti" => '~',
2044 "ul" => '_',
2045 "ru" => '_',
2046 "rs" => '\\',
2047 "sl" => '/',
2048 "bu" => '\u{00b7}',
2050 "ci" => '\u{25cb}',
2051 "sq" => '\u{25a1}',
2052 "lz" => '\u{25ca}',
2053 "dg" => '\u{2020}',
2054 "dd" => '\u{2021}',
2055 "ps" => '\u{00b6}',
2056 "sc" => '\u{00a7}',
2057 "lh" => '\u{261c}',
2058 "rh" => '\u{261e}',
2059 "co" => '\u{00a9}',
2060 "rg" => '\u{00ae}',
2061 "tm" => '\u{2122}',
2062 "fm" => '\u{2032}',
2063 "sd" => '\u{2033}',
2064 "de" => '\u{00b0}',
2065 "mc" => '\u{00b5}',
2066 "%0" => '\u{2030}',
2067 "at" => '@',
2069 "sh" => '#',
2070 "or" => '|',
2071 "ba" => '|',
2072 "br" => '\u{2502}',
2073 "bb" => '\u{00a6}',
2074 "rn" => '\u{203e}',
2075 "ct" => '\u{00a2}',
2076 "Do" => '$',
2078 "Eu" | "eu" => '\u{20ac}',
2079 "Po" => '\u{00a3}',
2080 "Ye" => '\u{00a5}',
2081 "Cs" => '\u{00a4}',
2082 "12" => '\u{00bd}',
2084 "14" => '\u{00bc}',
2085 "34" => '\u{00be}',
2086 "ff" => '\u{fb00}',
2087 "fi" => '\u{fb01}',
2088 "fl" => '\u{fb02}',
2089 "Fi" => '\u{fb03}',
2090 "Fl" => '\u{fb04}',
2091 "oA" => '\u{00c5}',
2093 "oa" => '\u{00e5}',
2094 "/L" => '\u{0141}',
2095 "/l" => '\u{0142}',
2096 "/O" => '\u{00d8}',
2097 "/o" => '\u{00f8}',
2098 "a-" => '\u{00af}',
2099 "a." => '\u{02d9}',
2100 "ad" => '\u{00a8}',
2101 "ah" => '\u{02c7}',
2102 "a^" => '^',
2103 ":a" => '\u{00e4}',
2105 ":e" => '\u{00eb}',
2106 ":i" => '\u{00ef}',
2107 ":o" => '\u{00f6}',
2108 ":u" => '\u{00fc}',
2109 ":y" => '\u{00ff}',
2110 ":A" => '\u{00c4}',
2111 ":E" => '\u{00cb}',
2112 ":I" => '\u{00cf}',
2113 ":O" => '\u{00d6}',
2114 ":U" => '\u{00dc}',
2115 ":Y" => '\u{0178}',
2116 "'a" => '\u{00e1}',
2118 "'c" => '\u{0107}',
2119 "'e" => '\u{00e9}',
2120 "'i" => '\u{00ed}',
2121 "'o" => '\u{00f3}',
2122 "'u" => '\u{00fa}',
2123 "'y" => '\u{00fd}',
2124 "'A" => '\u{00c1}',
2125 "'C" => '\u{0106}',
2126 "'E" => '\u{00c9}',
2127 "'I" => '\u{00cd}',
2128 "'O" => '\u{00d3}',
2129 "'U" => '\u{00da}',
2130 "'Y" => '\u{00dd}',
2131 "`a" => '\u{00e0}',
2133 "`e" => '\u{00e8}',
2134 "`i" => '\u{00ec}',
2135 "`o" => '\u{00f2}',
2136 "`u" => '\u{00f9}',
2137 "`A" => '\u{00c0}',
2138 "`E" => '\u{00c8}',
2139 "`I" => '\u{00cc}',
2140 "`O" => '\u{00d2}',
2141 "`U" => '\u{00d9}',
2142 "^a" => '\u{00e2}',
2144 "^e" => '\u{00ea}',
2145 "^i" => '\u{00ee}',
2146 "^o" => '\u{00f4}',
2147 "^u" => '\u{00fb}',
2148 "^A" => '\u{00c2}',
2149 "^E" => '\u{00ca}',
2150 "^I" => '\u{00ce}',
2151 "^O" => '\u{00d4}',
2152 "^U" => '\u{00db}',
2153 "~a" => '\u{00e3}',
2155 "~n" => '\u{00f1}',
2156 "~o" => '\u{00f5}',
2157 "~A" => '\u{00c3}',
2158 "~N" => '\u{00d1}',
2159 "~O" => '\u{00d5}',
2160 ",c" => '\u{00e7}',
2162 ",C" => '\u{00c7}',
2163 "ss" => '\u{00df}',
2165 "ae" => '\u{00e6}',
2166 "AE" => '\u{00c6}',
2167 "oe" => '\u{0153}',
2168 "OE" => '\u{0152}',
2169 "-D" => '\u{00d0}',
2170 "Sd" => '\u{00f0}',
2171 "TP" => '\u{00de}',
2172 "Tp" => '\u{00fe}',
2173 "pl" => '+',
2175 "mi" => '\u{2212}',
2176 "mu" => '\u{00d7}',
2177 "di" => '\u{00f7}',
2178 "+-" => '\u{00b1}',
2179 "**" => '\u{2217}',
2180 "c*" => '\u{2297}',
2181 "c+" => '\u{2295}',
2182 "<=" => '\u{2264}',
2183 ">=" => '\u{2265}',
2184 "!=" => '\u{2260}',
2185 "==" => '\u{2261}',
2186 "->" => '\u{2192}',
2187 "<-" => '\u{2190}',
2188 "eq" => '=',
2189 "no" => '\u{00ac}',
2190 "sr" => '\u{221a}',
2191 "is" => '\u{222b}',
2192 "pd" => '\u{2202}',
2193 "gr" => '\u{2207}',
2194 "fa" => '\u{2200}',
2195 "te" => '\u{2203}',
2196 "if" => '\u{221e}',
2197 "pt" => '\u{221d}',
2198 "es" => '\u{2205}',
2199 "ca" => '\u{2229}',
2200 "cu" => '\u{222a}',
2201 "sb" => '\u{2282}',
2202 "sp" => '\u{2283}',
2203 "ib" => '\u{2286}',
2204 "ip" => '\u{2287}',
2205 "mo" => '\u{2208}',
2206 "nm" => '\u{2209}',
2207 "pp" => '\u{22a5}',
2208 "3d" => '\u{2234}',
2209 "Ah" => '\u{2135}',
2210 "Im" => '\u{2111}',
2211 "Re" => '\u{211c}',
2212 "wp" => '\u{2118}',
2213 "la" => '\u{27e8}',
2215 "ra" => '\u{27e9}',
2216 "va" => '\u{2195}',
2217 "an" => '\u{23af}',
2218 "*a" => '\u{03b1}',
2220 "*b" => '\u{03b2}',
2221 "*g" => '\u{03b3}',
2222 "*d" => '\u{03b4}',
2223 "*e" => '\u{03b5}',
2224 "*z" => '\u{03b6}',
2225 "*y" => '\u{03b7}',
2226 "*h" => '\u{03b8}',
2227 "*i" => '\u{03b9}',
2228 "*k" => '\u{03ba}',
2229 "*l" => '\u{03bb}',
2230 "*m" => '\u{03bc}',
2231 "*n" => '\u{03bd}',
2232 "*c" => '\u{03be}',
2233 "*o" => '\u{03bf}',
2234 "*p" => '\u{03c0}',
2235 "*r" => '\u{03c1}',
2236 "ts" => '\u{03c2}',
2237 "*s" => '\u{03c3}',
2238 "*t" => '\u{03c4}',
2239 "*u" => '\u{03c5}',
2240 "*f" => '\u{03c6}',
2241 "*x" => '\u{03c7}',
2242 "*q" => '\u{03c8}',
2243 "*w" => '\u{03c9}',
2244 "*A" => '\u{0391}',
2246 "*B" => '\u{0392}',
2247 "*G" => '\u{0393}',
2248 "*D" => '\u{0394}',
2249 "*E" => '\u{0395}',
2250 "*Z" => '\u{0396}',
2251 "*Y" => '\u{0397}',
2252 "*H" => '\u{0398}',
2253 "*I" => '\u{0399}',
2254 "*K" => '\u{039a}',
2255 "*L" => '\u{039b}',
2256 "*M" => '\u{039c}',
2257 "*N" => '\u{039d}',
2258 "*C" => '\u{039e}',
2259 "*O" => '\u{039f}',
2260 "*P" => '\u{03a0}',
2261 "*R" => '\u{03a1}',
2262 "*S" => '\u{03a3}',
2263 "*T" => '\u{03a4}',
2264 "*U" => '\u{03a5}',
2265 "*F" => '\u{03a6}',
2266 "*X" => '\u{03a7}',
2267 "*Q" => '\u{03a8}',
2268 "*W" => '\u{03a9}',
2269 _ => return None,
2270 };
2271 Some(c)
2272}
2273
2274#[cfg(test)]
2275mod tests {
2276 use super::*;
2277 use carta_core::Extension;
2278
2279 fn read(input: &str) -> Document {
2280 read_with(input, Extensions::from_list(&[Extension::AutoIdentifiers]))
2281 }
2282
2283 fn read_with(input: &str, extensions: Extensions) -> Document {
2284 let mut options = ReaderOptions::default();
2285 options.extensions = extensions;
2286 ManReader.read(input, &options).expect("read")
2287 }
2288
2289 #[test]
2290 fn title_populates_metadata() {
2291 let doc = read(".TH FOO 1 \"2024-01-01\" \"version 1.0\" \"Foo Manual\"\n");
2292 assert_eq!(
2293 doc.meta.get("title"),
2294 Some(&MetaValue::MetaInlines(vec![Inline::Str("FOO".into())]))
2295 );
2296 assert_eq!(
2297 doc.meta.get("section"),
2298 Some(&MetaValue::MetaInlines(vec![Inline::Str("1".into())]))
2299 );
2300 assert_eq!(
2301 doc.meta.get("header"),
2302 Some(&MetaValue::MetaInlines(vec![
2303 Inline::Str("Foo".into()),
2304 Inline::Space,
2305 Inline::Str("Manual".into()),
2306 ]))
2307 );
2308 }
2309
2310 #[test]
2311 fn section_headings_get_identifiers() {
2312 let doc = read(".TH T 1\n.SH NAME\nfoo\n.SS Sub Title\nbar\n");
2313 assert_eq!(
2314 doc.blocks.first(),
2315 Some(&Block::Header(
2316 1,
2317 Box::new(Attr {
2318 id: "name".into(),
2319 ..Attr::default()
2320 }),
2321 vec![Inline::Str("NAME".into())]
2322 ))
2323 );
2324 assert!(matches!(
2325 doc.blocks.get(2),
2326 Some(Block::Header(2, attr, _)) if attr.id == "sub-title"
2327 ));
2328 }
2329
2330 #[test]
2331 fn duplicate_headings_disambiguate() {
2332 let doc = read(".TH T 1\n.SH Foo\nx\n.SH Foo\ny\n");
2333 let ids: Vec<&str> = doc
2334 .blocks
2335 .iter()
2336 .filter_map(|b| match b {
2337 Block::Header(_, attr, _) => Some(attr.id.as_str()),
2338 _ => None,
2339 })
2340 .collect();
2341 assert_eq!(ids, vec!["foo", "foo-1"]);
2342 }
2343
2344 #[test]
2345 fn auto_identifiers_off_leaves_empty_id() {
2346 let doc = read_with(".TH T 1\n.SH Foo Bar\nx\n", Extensions::empty());
2347 assert!(matches!(
2348 doc.blocks.first(),
2349 Some(Block::Header(1, attr, _)) if attr.id.is_empty()
2350 ));
2351 }
2352
2353 #[test]
2354 fn lines_fill_into_one_paragraph() {
2355 let doc = read(".TH T 1\nfirst line\nsecond line\n");
2356 assert_eq!(
2357 doc.blocks.first(),
2358 Some(&Block::Para(vec![
2359 Inline::Str("first".into()),
2360 Inline::Space,
2361 Inline::Str("line".into()),
2362 Inline::Space,
2363 Inline::Str("second".into()),
2364 Inline::Space,
2365 Inline::Str("line".into()),
2366 ]))
2367 );
2368 }
2369
2370 #[test]
2371 fn blank_line_separates_paragraphs() {
2372 let doc = read(".TH T 1\none\n\ntwo\n");
2373 assert_eq!(doc.blocks.len(), 2);
2374 }
2375
2376 #[test]
2377 fn bold_macro_joins_arguments() {
2378 let doc = read(".TH T 1\n.B \"two words\" tail\n");
2379 assert_eq!(
2380 doc.blocks.first(),
2381 Some(&Block::Para(vec![Inline::Strong(vec![
2382 Inline::Str("two".into()),
2383 Inline::Space,
2384 Inline::Str("words".into()),
2385 Inline::Space,
2386 Inline::Str("tail".into()),
2387 ])]))
2388 );
2389 }
2390
2391 #[test]
2392 fn font_macro_nests_an_inner_font_escape() {
2393 let doc = read(".TH T 1\n.B \\-f \\fIfile\\fR tail\n");
2394 assert_eq!(
2395 doc.blocks.first(),
2396 Some(&Block::Para(vec![Inline::Strong(vec![
2397 Inline::Str("-f".into()),
2398 Inline::Space,
2399 Inline::Emph(vec![Inline::Str("file".into())]),
2400 Inline::Space,
2401 Inline::Str("tail".into()),
2402 ])]))
2403 );
2404 }
2405
2406 #[test]
2407 fn alternating_font_arg_wraps_an_inner_escape() {
2408 let doc = read(".TH T 1\n.BR a\\fIx\\fR b\n");
2409 assert_eq!(
2410 doc.blocks.first(),
2411 Some(&Block::Para(vec![
2412 Inline::Strong(vec![
2413 Inline::Str("a".into()),
2414 Inline::Emph(vec![Inline::Str("x".into())]),
2415 ]),
2416 Inline::Str("b".into()),
2417 ]))
2418 );
2419 }
2420
2421 #[test]
2422 fn alternating_fonts_abut_without_space() {
2423 let doc = read(".TH T 1\n.BR bold roman\n");
2424 assert_eq!(
2425 doc.blocks.first(),
2426 Some(&Block::Para(vec![
2427 Inline::Strong(vec![Inline::Str("bold".into())]),
2428 Inline::Str("roman".into()),
2429 ]))
2430 );
2431 }
2432
2433 #[test]
2434 fn inline_font_escape_groups_run() {
2435 let doc = read(".TH T 1\n\\fBtwo words\\fR plain\n");
2436 assert_eq!(
2437 doc.blocks.first(),
2438 Some(&Block::Para(vec![
2439 Inline::Strong(vec![
2440 Inline::Str("two".into()),
2441 Inline::Space,
2442 Inline::Str("words".into()),
2443 ]),
2444 Inline::Space,
2445 Inline::Str("plain".into()),
2446 ]))
2447 );
2448 }
2449
2450 #[test]
2451 fn boundary_space_leaves_the_font_run() {
2452 let doc = read(".TH T 1\n\\fBbold \\fRroman\n");
2453 assert_eq!(
2454 doc.blocks.first(),
2455 Some(&Block::Para(vec![
2456 Inline::Strong(vec![Inline::Str("bold".into())]),
2457 Inline::Space,
2458 Inline::Str("roman".into()),
2459 ]))
2460 );
2461 }
2462
2463 #[test]
2464 fn break_macro_is_a_line_break() {
2465 let doc = read(".TH T 1\nbefore\n.br\nafter\n");
2466 assert_eq!(
2467 doc.blocks.first(),
2468 Some(&Block::Para(vec![
2469 Inline::Str("before".into()),
2470 Inline::LineBreak,
2471 Inline::Str("after".into()),
2472 ]))
2473 );
2474 }
2475
2476 #[test]
2477 fn comment_is_transparent() {
2478 let doc = read(".TH T 1\nvisible\n.\\\" a comment\nstill\n");
2479 assert_eq!(
2480 doc.blocks.first(),
2481 Some(&Block::Para(vec![
2482 Inline::Str("visible".into()),
2483 Inline::Space,
2484 Inline::Str("still".into()),
2485 ]))
2486 );
2487 }
2488
2489 #[test]
2490 fn special_characters_resolve() {
2491 let doc = read(".TH T 1\ndash \\- bullet \\(bu em \\(em\n");
2492 assert_eq!(
2493 doc.blocks.first(),
2494 Some(&Block::Para(vec![
2495 Inline::Str("dash".into()),
2496 Inline::Space,
2497 Inline::Str("-".into()),
2498 Inline::Space,
2499 Inline::Str("bullet".into()),
2500 Inline::Space,
2501 Inline::Str("\u{00b7}".into()),
2502 Inline::Space,
2503 Inline::Str("em".into()),
2504 Inline::Space,
2505 Inline::Str("\u{2014}".into()),
2506 ]))
2507 );
2508 }
2509
2510 #[test]
2511 fn unknown_special_character_is_replacement() {
2512 let doc = read(".TH T 1\nx \\(ZZ y\n");
2513 assert_eq!(
2514 doc.blocks.first(),
2515 Some(&Block::Para(vec![
2516 Inline::Str("x".into()),
2517 Inline::Space,
2518 Inline::Str("\u{fffd}".into()),
2519 Inline::Space,
2520 Inline::Str("y".into()),
2521 ]))
2522 );
2523 }
2524
2525 #[test]
2526 fn unicode_escape_resolves() {
2527 let doc = read(".TH T 1\n\\[u00C9]\n");
2528 assert_eq!(
2529 doc.blocks.first(),
2530 Some(&Block::Para(vec![Inline::Str("\u{00c9}".into())]))
2531 );
2532 }
2533
2534 #[test]
2535 fn tbl_region_becomes_a_table() {
2536 let doc = read(".TH T 1\n.TS\nl r.\nName\tAge\n_\nAda\t36\n.TE\nafter\n");
2537 let Some(Block::Table(table)) = doc.blocks.first() else {
2538 panic!("expected a table");
2539 };
2540 assert_eq!(
2542 table.col_specs,
2543 vec![
2544 ColSpec {
2545 align: Alignment::AlignLeft,
2546 width: ColWidth::ColWidthDefault,
2547 },
2548 ColSpec {
2549 align: Alignment::AlignRight,
2550 width: ColWidth::ColWidthDefault,
2551 },
2552 ]
2553 );
2554 assert_eq!(table.head.rows.len(), 1);
2556 assert_eq!(table.head.rows.first().map(|row| row.cells.len()), Some(2));
2557 assert_eq!(table.bodies.first().map(|body| body.body.len()), Some(1));
2558 assert_eq!(
2559 doc.blocks.get(1),
2560 Some(&Block::Para(vec![Inline::Str("after".into())]))
2561 );
2562 }
2563
2564 #[test]
2565 fn tbl_without_header_rule_puts_every_row_in_the_body() {
2566 let doc = read(".TH T 1\n.TS\nc c.\nName\tAge\nAda\t36\n.TE\n");
2567 let Some(Block::Table(table)) = doc.blocks.first() else {
2568 panic!("expected a table");
2569 };
2570 assert!(table.head.rows.is_empty());
2571 assert_eq!(table.bodies.first().map(|body| body.body.len()), Some(2));
2572 }
2573
2574 #[test]
2575 fn malformed_tbl_region_yields_no_block() {
2576 let doc = read(".TS");
2577 assert!(doc.blocks.is_empty());
2578 }
2579
2580 #[test]
2581 fn tagged_paragraphs_become_a_definition_list() {
2582 let doc = read(".TH T 1\n.TP\n.B \\-v\nVerbose mode.\n.TP\n.B \\-f\nUse a file.\n");
2583 let Some(Block::DefinitionList(items)) = doc.blocks.first() else {
2584 panic!("expected a definition list");
2585 };
2586 assert_eq!(items.len(), 2);
2587 assert_eq!(
2588 items.first().map(|(term, _)| term.clone()),
2589 Some(vec![Inline::Strong(vec![Inline::Str("-v".into())])])
2590 );
2591 }
2592
2593 #[test]
2594 fn bullet_indented_paragraphs_become_a_bullet_list() {
2595 let doc = read(".TH T 1\n.IP \\(bu 2\none\n.IP \\(bu 2\ntwo\n");
2596 let Some(Block::BulletList(items)) = doc.blocks.first() else {
2597 panic!("expected a bullet list");
2598 };
2599 assert_eq!(items.len(), 2);
2600 }
2601
2602 #[test]
2603 fn numbered_indented_paragraphs_become_an_ordered_list() {
2604 let doc = read(".TH T 1\n.IP 3. 4\nthree\n.IP 4. 4\nfour\n");
2605 assert_eq!(
2606 doc.blocks.first(),
2607 Some(&Block::OrderedList(
2608 ListAttributes {
2609 start: 3,
2610 style: ListNumberStyle::Decimal,
2611 delim: ListNumberDelim::Period,
2612 },
2613 vec![
2614 vec![Block::Para(vec![Inline::Str("three".into())])],
2615 vec![Block::Para(vec![Inline::Str("four".into())])],
2616 ]
2617 ))
2618 );
2619 }
2620
2621 #[test]
2622 fn roman_marker_is_lower_roman() {
2623 assert!(matches!(
2624 parse_enumerator("iv."),
2625 Some(ListAttributes {
2626 start: 4,
2627 style: ListNumberStyle::LowerRoman,
2628 delim: ListNumberDelim::Period,
2629 })
2630 ));
2631 }
2632
2633 #[test]
2634 fn bare_letter_marker_uses_its_position() {
2635 assert!(matches!(
2636 parse_enumerator("o"),
2637 Some(ListAttributes {
2638 start: 15,
2639 style: ListNumberStyle::LowerAlpha,
2640 delim: ListNumberDelim::DefaultDelim,
2641 })
2642 ));
2643 }
2644
2645 #[test]
2646 fn unmarked_indented_paragraph_is_an_inset() {
2647 let doc = read(".TH T 1\n.IP\nplain indented\n");
2648 assert!(matches!(doc.blocks.first(), Some(Block::BlockQuote(_))));
2649 }
2650
2651 #[test]
2652 fn relative_inset_becomes_a_block_quote() {
2653 let doc = read(".TH T 1\n.RS\ninside\n.RE\nafter\n");
2654 assert_eq!(
2655 doc.blocks.first(),
2656 Some(&Block::BlockQuote(vec![Block::Para(vec![Inline::Str(
2657 "inside".into()
2658 )])]))
2659 );
2660 assert_eq!(
2661 doc.blocks.get(1),
2662 Some(&Block::Para(vec![Inline::Str("after".into())]))
2663 );
2664 }
2665
2666 #[test]
2667 fn nested_insets_nest_block_quotes() {
2668 let doc = read(".TH T 1\n.RS\nouter\n.RS\ninner\n.RE\n.RE\n");
2669 assert!(matches!(
2670 doc.blocks.first(),
2671 Some(Block::BlockQuote(inner)) if inner.iter().any(|b| matches!(b, Block::BlockQuote(_)))
2672 ));
2673 }
2674
2675 #[test]
2676 fn no_fill_region_becomes_a_code_block() {
2677 let doc = read(".TH T 1\n.nf\nline one\n indented\n.fi\n");
2678 assert_eq!(
2679 doc.blocks.first(),
2680 Some(&Block::CodeBlock(
2681 Box::default(),
2682 "line one\n indented".into()
2683 ))
2684 );
2685 }
2686
2687 #[test]
2688 fn example_region_becomes_a_code_block() {
2689 let doc = read(".TH T 1\n.EX\n\\fBcode\\fR \\- here\n.EE\n");
2690 assert_eq!(
2691 doc.blocks.first(),
2692 Some(&Block::CodeBlock(Box::default(), "code - here".into()))
2693 );
2694 }
2695
2696 #[test]
2697 fn uri_macro_becomes_a_link() {
2698 let doc = read(".TH T 1\n.UR https://example.com\nthe text\n.UE\n");
2699 assert_eq!(
2700 doc.blocks.first(),
2701 Some(&Block::Para(vec![Inline::Link(
2702 Box::default(),
2703 vec![
2704 Inline::Str("the".into()),
2705 Inline::Space,
2706 Inline::Str("text".into()),
2707 ],
2708 Box::new(Target {
2709 url: "https://example.com".into(),
2710 title: carta_ast::Text::default(),
2711 }),
2712 )]))
2713 );
2714 }
2715
2716 #[test]
2717 fn mail_macro_uses_mailto() {
2718 let doc = read(".TH T 1\n.MT user@example.com\nwrite me\n.ME\n");
2719 let Some(Block::Para(inlines)) = doc.blocks.first() else {
2720 panic!("expected a paragraph");
2721 };
2722 assert!(matches!(
2723 inlines.first(),
2724 Some(Inline::Link(_, _, target)) if target.url == "mailto:user@example.com"
2725 ));
2726 }
2727
2728 #[test]
2729 fn link_trailing_text_attaches_without_space() {
2730 let doc = read(".TH T 1\nsee\n.UR https://x.org\nhere\n.UE .\nnext\n");
2731 let Some(Block::Para(inlines)) = doc.blocks.first() else {
2732 panic!("expected a paragraph");
2733 };
2734 let link_index = inlines
2736 .iter()
2737 .position(|i| matches!(i, Inline::Link(..)))
2738 .expect("link present");
2739 assert_eq!(inlines.get(link_index + 1), Some(&Inline::Str(".".into())));
2740 }
2741
2742 #[test]
2743 fn unknown_macro_breaks_the_paragraph() {
2744 let doc = read(".TH T 1\nbefore\n.XYZ args\nafter\n");
2745 assert_eq!(doc.blocks.len(), 2);
2746 }
2747
2748 #[test]
2749 fn defined_string_interpolates_and_rescans_its_escapes() {
2750 let doc = read(".TH T 1\n.ds B \\fBbold\\fP\nx \\*B y\n");
2751 assert_eq!(
2752 doc.blocks.first(),
2753 Some(&Block::Para(vec![
2754 Inline::Str("x".into()),
2755 Inline::Space,
2756 Inline::Strong(vec![Inline::Str("bold".into())]),
2757 Inline::Space,
2758 Inline::Str("y".into()),
2759 ]))
2760 );
2761 }
2762
2763 #[test]
2764 fn predefined_strings_resolve() {
2765 let doc = read(".TH T 1\n\\*(lq x \\*(rq \\*(Tm \\*R\n");
2766 assert_eq!(
2767 doc.blocks.first(),
2768 Some(&Block::Para(vec![
2769 Inline::Str("\u{201c}".into()),
2770 Inline::Space,
2771 Inline::Str("x".into()),
2772 Inline::Space,
2773 Inline::Str("\u{201d}".into()),
2774 Inline::Space,
2775 Inline::Str("\u{2122}".into()),
2776 Inline::Space,
2777 Inline::Str("\u{00ae}".into()),
2778 ]))
2779 );
2780 }
2781
2782 #[test]
2783 fn accented_special_characters_resolve() {
2784 let doc = read(".TH T 1\n\\(:a\\(ss\\('e\\(la\\(,c\n");
2785 assert_eq!(
2786 doc.blocks.first(),
2787 Some(&Block::Para(vec![Inline::Str(
2788 "\u{e4}\u{df}\u{e9}\u{27e8}\u{e7}".into()
2789 )]))
2790 );
2791 }
2792
2793 #[test]
2794 fn tab_escape_becomes_a_space() {
2795 let doc = read(".TH T 1\na\\tb\n");
2796 assert_eq!(
2797 doc.blocks.first(),
2798 Some(&Block::Para(vec![
2799 Inline::Str("a".into()),
2800 Inline::Space,
2801 Inline::Str("b".into()),
2802 ]))
2803 );
2804 }
2805
2806 #[test]
2807 fn continuation_escape_is_dropped() {
2808 let doc = read(".TH T 1\nabc\\c\ndef\n");
2810 assert_eq!(
2811 doc.blocks.first(),
2812 Some(&Block::Para(vec![
2813 Inline::Str("abc".into()),
2814 Inline::Space,
2815 Inline::Str("def".into()),
2816 ]))
2817 );
2818 }
2819
2820 #[test]
2821 fn zero_width_and_motion_escapes_drop_their_glyphs() {
2822 let doc = read(".TH T 1\na\\zbc up\\udown\\d mark\\kx end\n");
2824 assert_eq!(
2825 doc.blocks.first(),
2826 Some(&Block::Para(vec![
2827 Inline::Str("ac".into()),
2828 Inline::Space,
2829 Inline::Str("updown".into()),
2830 Inline::Space,
2831 Inline::Str("mark".into()),
2832 Inline::Space,
2833 Inline::Str("end".into()),
2834 ]))
2835 );
2836 }
2837
2838 #[test]
2839 fn trailing_backslash_joins_the_next_line_without_a_space() {
2840 let doc = read(".TH T 1\nfoo\\\nbar\n");
2841 assert_eq!(
2842 doc.blocks.first(),
2843 Some(&Block::Para(vec![Inline::Str("foobar".into())]))
2844 );
2845 }
2846
2847 #[test]
2848 fn supplementary_tag_joins_terms_with_a_line_break() {
2849 let doc = read(".TH T 1\n.TP\n.B \\-a\n.TQ\n.B \\-b\nbody.\n");
2850 let Some(Block::DefinitionList(items)) = doc.blocks.first() else {
2851 panic!("expected a definition list");
2852 };
2853 assert_eq!(
2854 items.first().map(|(term, _)| term.clone()),
2855 Some(vec![
2856 Inline::Strong(vec![Inline::Str("-a".into())]),
2857 Inline::LineBreak,
2858 Inline::Strong(vec![Inline::Str("-b".into())]),
2859 ])
2860 );
2861 }
2862
2863 #[test]
2864 fn request_in_link_label_aborts_the_link() {
2865 let doc = read(".TH T 1\nbefore\n.UR u\n.B bold\n.UE after\nnext\n");
2868 assert_eq!(
2869 doc.blocks,
2870 vec![
2871 Block::Para(vec![Inline::Str("before".into())]),
2872 Block::Para(vec![Inline::Strong(vec![Inline::Str("bold".into())])]),
2873 Block::Para(vec![Inline::Str("next".into())]),
2874 ]
2875 );
2876 }
2877
2878 #[test]
2879 fn link_without_a_terminator_emits_its_label() {
2880 let doc = read(".TH T 1\n.UR u\nlabel\n");
2881 assert_eq!(
2882 doc.blocks.first(),
2883 Some(&Block::Para(vec![Inline::Str("label".into())]))
2884 );
2885 }
2886
2887 #[test]
2888 fn whitespace_only_line_does_not_break_the_paragraph() {
2889 let doc = read(".TH T 1\none\n \ntwo\n");
2890 assert_eq!(
2891 doc.blocks.first(),
2892 Some(&Block::Para(vec![
2893 Inline::Str("one".into()),
2894 Inline::Space,
2895 Inline::Str("two".into()),
2896 ]))
2897 );
2898 assert_eq!(doc.blocks.len(), 1);
2899 }
2900
2901 #[test]
2902 fn lone_whitespace_line_is_an_empty_paragraph() {
2903 let doc = read(".TH T 1\n \n");
2904 assert_eq!(doc.blocks.first(), Some(&Block::Para(Vec::new())));
2905 }
2906
2907 #[test]
2908 fn tagged_paragraph_with_no_body_becomes_a_paragraph() {
2909 let doc = read(".TH T 1\n.TP\n.B \\-x\n");
2910 assert_eq!(
2911 doc.blocks.first(),
2912 Some(&Block::Para(vec![Inline::Strong(vec![Inline::Str(
2913 "-x".into()
2914 )])]))
2915 );
2916 }
2917
2918 #[test]
2919 fn empty_tagged_paragraph_nests_the_following_items() {
2920 let doc = read(".TH T 1\n.TP\n.B \\-a\n.TP\n.B \\-b\nbody.\n");
2921 let Some(Block::DefinitionList(items)) = doc.blocks.first() else {
2922 panic!("expected a definition list");
2923 };
2924 assert_eq!(items.len(), 1);
2925 let nested = items
2926 .first()
2927 .and_then(|(_, bodies)| bodies.first())
2928 .and_then(|blocks| blocks.first());
2929 assert!(matches!(nested, Some(Block::DefinitionList(_))));
2930 }
2931
2932 #[test]
2933 fn marked_item_with_no_body_keeps_an_empty_paragraph() {
2934 let doc = read(".TH T 1\n.IP \\(bu\n.IP \\(bu\nsecond.\n");
2935 let Some(Block::BulletList(items)) = doc.blocks.first() else {
2936 panic!("expected a bullet list");
2937 };
2938 assert_eq!(items.first(), Some(&vec![Block::Para(Vec::new())]));
2939 }
2940
2941 #[test]
2942 fn unmarked_item_with_no_body_contributes_nothing() {
2943 let doc = read(".TH T 1\n.IP\n");
2944 assert!(doc.blocks.is_empty());
2945 }
2946
2947 #[test]
2948 fn ascii_identifiers_fold_an_accented_heading() {
2949 let doc = read_with(
2950 ".TH T 1\n.SH Café\nx\n",
2951 Extensions::from_list(&[Extension::AutoIdentifiers, Extension::AsciiIdentifiers]),
2952 );
2953 assert!(matches!(
2954 doc.blocks.first(),
2955 Some(Block::Header(1, attr, _)) if attr.id == "cafe"
2956 ));
2957 }
2958
2959 #[test]
2960 fn constant_width_font_escape_becomes_code() {
2961 let doc = read(".TH T 1\nplain \\f(CWmono\\fP back\n");
2962 assert_eq!(
2963 doc.blocks.first(),
2964 Some(&Block::Para(vec![
2965 Inline::Str("plain".into()),
2966 Inline::Space,
2967 Inline::Code(Box::default(), "mono".into()),
2968 Inline::Space,
2969 Inline::Str("back".into()),
2970 ]))
2971 );
2972 }
2973
2974 #[test]
2975 fn constant_width_bold_font_wraps_code_in_strong() {
2976 let doc = read(".TH T 1\n\\f(CBmono\\fP\n");
2977 assert_eq!(
2978 doc.blocks.first(),
2979 Some(&Block::Para(vec![Inline::Strong(vec![Inline::Code(
2980 Box::default(),
2981 "mono".into()
2982 )])]))
2983 );
2984 }
2985
2986 #[test]
2987 fn user_macro_substitutes_call_arguments() {
2988 let doc = read(".TH T 1\n.de GREET\nHello \\$1 and \\$2.\n..\n.GREET Alice Bob\n");
2989 assert_eq!(
2990 doc.blocks.first(),
2991 Some(&Block::Para(vec![
2992 Inline::Str("Hello".into()),
2993 Inline::Space,
2994 Inline::Str("Alice".into()),
2995 Inline::Space,
2996 Inline::Str("and".into()),
2997 Inline::Space,
2998 Inline::Str("Bob.".into()),
2999 ]))
3000 );
3001 }
3002
3003 #[test]
3004 fn multi_line_macro_expansion_fills_like_inline_text() {
3005 let inline = read(".TH T 1\nfirst line\nsecond line\n");
3006 let via_macro = read(".TH T 1\n.de M\nfirst line\nsecond line\n..\n.M\n");
3007 assert_eq!(inline.blocks, via_macro.blocks);
3008 }
3009
3010 #[test]
3011 fn nested_macro_call_expands_in_place_preserving_order() {
3012 let doc =
3013 read(".TH T 1\n.de INNER\nmiddle\n..\n.de OUTER\nbefore\n.INNER\nafter\n..\n.OUTER\n");
3014 assert_eq!(
3015 doc.blocks.first(),
3016 Some(&Block::Para(vec![
3017 Inline::Str("before".into()),
3018 Inline::Space,
3019 Inline::Str("middle".into()),
3020 Inline::Space,
3021 Inline::Str("after".into()),
3022 ]))
3023 );
3024 }
3025
3026 #[test]
3027 fn macro_expansion_seam_keeps_base_lines_in_order() {
3028 let doc = read(".TH T 1\n.de M\nexpanded\n..\n.M\nbase line\n");
3029 assert_eq!(
3030 doc.blocks.first(),
3031 Some(&Block::Para(vec![
3032 Inline::Str("expanded".into()),
3033 Inline::Space,
3034 Inline::Str("base".into()),
3035 Inline::Space,
3036 Inline::Str("line".into()),
3037 ]))
3038 );
3039 }
3040
3041 #[test]
3042 fn conditional_inside_macro_expansion_reprocesses_the_queued_line() {
3043 let doc = read(".TH T 1\n.de M\n.ie n kept\n.el dropped\n..\n.M\nbase line\n");
3046 assert_eq!(
3047 doc.blocks.first(),
3048 Some(&Block::Para(vec![
3049 Inline::Str("kept".into()),
3050 Inline::Space,
3051 Inline::Str("base".into()),
3052 Inline::Space,
3053 Inline::Str("line".into()),
3054 ]))
3055 );
3056 }
3057
3058 #[test]
3059 fn link_label_spanning_macro_expansion_and_base_document_is_recognized() {
3060 let doc =
3063 read(".TH T 1\n.de LABEL\n.UR https://example.com\nfirst\n..\n.LABEL\nsecond\n.UE\n");
3064 let Some(Block::Para(inlines)) = doc.blocks.first() else {
3065 panic!("expected a paragraph");
3066 };
3067 assert!(matches!(
3068 inlines.first(),
3069 Some(Inline::Link(_, _, target)) if target.url == "https://example.com"
3070 ));
3071 }
3072
3073 #[test]
3074 fn doubled_backslash_argument_reference_reduces_like_a_single_one() {
3075 let single = read(".TH T 1\n.de M\nvalue \\$1\n..\n.M x\n");
3076 let doubled = read(".TH T 1\n.de M\nvalue \\\\$1\n..\n.M x\n");
3077 assert_eq!(single.blocks, doubled.blocks);
3078 assert_eq!(
3079 single.blocks.first(),
3080 Some(&Block::Para(vec![
3081 Inline::Str("value".into()),
3082 Inline::Space,
3083 Inline::Str("x".into()),
3084 ]))
3085 );
3086 }
3087
3088 #[test]
3089 fn copy_mode_reduces_an_escaped_backslash_before_an_escape() {
3090 assert_eq!(reduce_copy_mode("x\\\\(buy"), "x\\(buy");
3091 assert_eq!(reduce_copy_mode("plain text"), "plain text");
3092 }
3093
3094 #[test]
3095 fn font_macro_with_an_explicit_empty_argument_keeps_its_wrapper() {
3096 let doc = read(".TH T 1\nbefore\n.B \"\"\nafter\n");
3097 assert_eq!(
3098 doc.blocks.first(),
3099 Some(&Block::Para(vec![
3100 Inline::Str("before".into()),
3101 Inline::Space,
3102 Inline::Strong(Vec::new()),
3103 Inline::Space,
3104 Inline::Str("after".into()),
3105 ]))
3106 );
3107 }
3108
3109 #[test]
3110 fn font_macro_with_no_argument_takes_the_next_line() {
3111 let doc = read(".TH T 1\nbefore\n.I\nafter\n");
3112 assert_eq!(
3113 doc.blocks.first(),
3114 Some(&Block::Para(vec![
3115 Inline::Str("before".into()),
3116 Inline::Space,
3117 Inline::Emph(vec![Inline::Str("after".into())]),
3118 ]))
3119 );
3120 }
3121
3122 #[test]
3123 fn option_synopsis_brackets_a_bold_option_name() {
3124 let doc = read(".TH T 1\n.OP \\-o file\n");
3125 assert_eq!(
3126 doc.blocks.first(),
3127 Some(&Block::Para(vec![
3128 Inline::Str("[".into()),
3129 Inline::Space,
3130 Inline::Strong(vec![Inline::Str("-o".into())]),
3131 Inline::Space,
3132 Inline::Str("file".into()),
3133 Inline::Space,
3134 Inline::Str("]".into()),
3135 ]))
3136 );
3137 }
3138
3139 #[test]
3140 fn table_with_a_horizontal_span_degrades_to_a_placeholder() {
3141 let doc = read(".TH T 1\n.TS\nl s l.\nWide\t\tEnd\none\ttwo\tthree\n.TE\n");
3142 assert_eq!(
3143 doc.blocks.first(),
3144 Some(&Block::Para(vec![Inline::Str("TABLE".into())]))
3145 );
3146 }
3147
3148 #[test]
3149 fn table_text_block_joins_its_lines() {
3150 let doc = read(".TH T 1\n.TS\nl l.\nName\tT{\nA long\ndescription\nT}\nLeft\tRight\n.TE\n");
3151 let Some(Block::Table(table)) = doc.blocks.first() else {
3152 panic!("expected a table");
3153 };
3154 let cell_text = format!("{table:?}");
3156 assert!(cell_text.contains("long"));
3157 assert!(cell_text.contains("description"));
3158 }
3159
3160 #[test]
3161 fn east_asian_line_breaks_is_accepted_and_inert() {
3162 let input = ".TH T 1\n.SH H\nplain filled text\n";
3163 let base = read(input);
3164 let with = read_with(
3165 input,
3166 Extensions::from_list(&[Extension::AutoIdentifiers, Extension::EastAsianLineBreaks]),
3167 );
3168 assert_eq!(base.blocks, with.blocks);
3169 }
3170}