asciidork_parser/tasks/parse_inlines/
inline_utils.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
use lazy_static::lazy_static;
use regex::Regex;

use crate::internal::*;
use crate::variants::token::*;

impl<'arena> Parser<'arena> {
  pub fn macro_target_from_passthru(
    &mut self,
    line: &mut Line<'arena>,
  ) -> Option<SourceString<'arena>> {
    if line.starts_with_seq(&[Kind(PreprocPassthru), Kind(OpenBracket)]) {
      let placeholder = line.consume_current().unwrap();
      line.discard(1); // open bracket
      let index: usize = placeholder.lexeme[1..6].parse().unwrap();
      let src_len = placeholder.loc.size();
      let mut restored = BumpString::with_capacity_in(src_len as usize, self.bump);
      let content = self.ctx.passthrus[index].take().unwrap();
      for text in content.plain_text().iter() {
        restored.push_str(text);
      }
      Some(SourceString::new(restored, placeholder.loc))
    } else {
      None
    }
  }
}

#[derive(Debug)]
pub struct Accum<'arena> {
  pub inlines: InlineNodes<'arena>,
  pub text: CollectText<'arena>,
}

impl<'arena> Accum<'arena> {
  pub fn commit(&mut self) {
    self.text.commit_inlines(&mut self.inlines);
  }

  pub fn push_node(&mut self, node: Inline<'arena>, loc: SourceLocation) {
    self.commit();
    self.inlines.push(InlineNode::new(node, loc));
    self.text.loc = loc.clamp_end();
  }

  pub fn pop_node(&mut self) {
    self.inlines.pop();
  }

  pub fn maybe_push_joining_newline(&mut self, lines: &ContiguousLines<'arena>) {
    if !lines.is_empty() {
      self.commit();
      self.text.loc.end += 1;
      self.push_node(Inline::Newline, self.text.loc);
    }
  }

  pub fn trimmed_inlines(mut self) -> InlineNodes<'arena> {
    if self.inlines.remove_trailing_line_comment() {
      self.inlines.remove_trailing_newline();
      if matches!(
        self.inlines.last().map(|n| &n.content),
        Some(Inline::Discarded)
      ) {
        self.inlines.pop();
      }
      self.trimmed_inlines()
    } else {
      self.inlines
    }
  }
}

impl Substitutions {
  /// https://docs.asciidoctor.org/asciidoc/latest/pass/pass-macro/#custom-substitutions
  pub fn from_pass_macro_target(target: BumpString) -> Self {
    if target.is_empty() {
      return Substitutions::none();
    };
    let mut subs = Self::none();
    target.split(',').for_each(|value| match value {
      "c" | "specialchars" => subs.insert(Subs::SpecialChars),
      "a" | "attributes" => subs.insert(Subs::AttrRefs),
      "r" | "replacements" => subs.insert(Subs::CharReplacement),
      "m" | "macros" => subs.insert(Subs::Macros),
      "q" | "quotes" => subs.insert(Subs::InlineFormatting),
      "v" | "verbatim" => subs.insert(Subs::SpecialChars),
      "n" | "normal" => subs = Substitutions::normal(),
      // NB: rx docs say | "post replacements", but doesn't work
      "p" => subs.insert(Subs::PostReplacement),
      _ => {}
    });
    subs
  }

  pub fn from_pass_plus_token(token: &Token) -> Self {
    if token.len() == 3 {
      Substitutions::none()
    } else {
      Substitutions::only_special_chars()
    }
  }
}

pub fn extend(loc: &mut SourceLocation, nodes: &[InlineNode<'_>], adding: usize) {
  loc.end = nodes.last().map(|node| node.loc.end).unwrap_or(loc.end) + adding as u32;
}

pub fn starts_constrained(
  stop_tokens: &[TokenSpec],
  token: &Token,
  line: &Line,
  lines: &mut ContiguousLines,
) -> bool {
  debug_assert!(!stop_tokens.is_empty());
  token.is(stop_tokens.last().unwrap().token_kind())
    && (line.terminates_constrained(stop_tokens) || lines.terminates_constrained(stop_tokens))
}

pub fn starts_unconstrained(
  stop_tokens: &[TokenSpec],
  token: &Token,
  line: &Line,
  lines: &ContiguousLines,
) -> bool {
  debug_assert!(!stop_tokens.is_empty());
  token.is(stop_tokens[0].token_kind())
    && (stop_tokens.len() < 2 || line.current_is(stop_tokens[1].token_kind()))
    && contains_seq(stop_tokens, line, lines)
}

pub fn contains_seq(seq: &[TokenSpec], line: &Line, lines: &ContiguousLines) -> bool {
  line.contains_seq(seq) || lines.contains_seq(seq)
}

pub fn contains_len(kind: TokenKind, len: usize, line: &Line, lines: &ContiguousLines) -> bool {
  line.contains_len(kind, len) || lines.contains_len(kind, len)
}

pub fn finish_macro<'arena>(
  line: &Line<'arena>,
  loc: &mut SourceLocation,
  line_end: SourceLocation,
  text: &mut CollectText<'arena>,
) {
  if let Some(cur_location) = line.loc() {
    loc.extend(cur_location);
    text.loc = loc.clamp_end();
    loc.end -= 1; // parsing attr list moves us one past end of macro
  } else {
    loc.extend(line_end);
    text.loc = loc.clamp_end();
  }
}

lazy_static! {
  pub static ref EMAIL_RE: Regex = Regex::new(
    r"^([a-z0-9_+]([a-z0-9_+.]*[a-z0-9_+])?)@([a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,6})"
  )
  .unwrap();
}