smt_scope/formatter/
defns.rs

1use std::borrow::Cow;
2
3use fxhash::FxHashMap;
4
5use crate::NonMaxU32;
6
7use super::{ConversionError, DeParseTrait, FallbackParseError, TdcError};
8
9pub const CONTROL_CHARACTER: char = '$';
10pub const SEPARATOR_CHARACTER: char = '|';
11pub const DEFAULT_BIND_POWER: BindPower = 0;
12
13#[derive(Default, Debug, Clone)]
14pub struct TermDisplayContext {
15    string_matchers: FxHashMap<(Cow<'static, str>, Option<NonMaxU32>), TermDisplay>,
16    regex_matchers: Vec<TermDisplay>,
17    regex_set: regex::RegexSet,
18    fallback: FallbackFormatter,
19}
20
21pub type TermDisplayContextParts<'a> = (
22    &'a FxHashMap<(Cow<'static, str>, Option<NonMaxU32>), TermDisplay>,
23    &'a Vec<TermDisplay>,
24    &'a FallbackFormatter,
25);
26
27impl FromIterator<TermDisplay> for Result<TermDisplayContext, TdcError> {
28    fn from_iter<T: IntoIterator<Item = TermDisplay>>(iter: T) -> Self {
29        let mut this = TermDisplayContext::default();
30        this.append(iter.into_iter())?;
31        Ok(this)
32    }
33}
34
35impl TermDisplayContext {
36    pub fn new(fallback: Formatter) -> Self {
37        Self {
38            string_matchers: Default::default(),
39            regex_matchers: Default::default(),
40            regex_set: Default::default(),
41            fallback: FallbackFormatter(fallback),
42        }
43    }
44
45    pub fn is_empty(&self) -> bool {
46        self.string_matchers.is_empty() && self.regex_matchers.is_empty()
47    }
48    pub fn all(&self) -> impl Iterator<Item = &TermDisplay> {
49        self.string_matchers.values().chain(&self.regex_matchers)
50    }
51    pub fn fallback(&self) -> &FallbackFormatter {
52        &self.fallback
53    }
54
55    pub fn set_fallback(&mut self, formatter: FallbackFormatter) {
56        self.fallback = formatter
57    }
58
59    /// Appends multiple `TermDisplay` at once. This is more efficient than
60    /// repeatedly calling `push` as matching set for all regexes is calculated
61    /// only once.
62    pub fn append(&mut self, terms: impl Iterator<Item = TermDisplay>) -> Result<(), TdcError> {
63        let mut added_regex_matcher = false;
64        for term in terms {
65            added_regex_matcher |= self.push_inner(term)?;
66        }
67        if added_regex_matcher {
68            self.calculate_regex_set();
69        }
70        Ok(())
71    }
72
73    pub fn push(&mut self, term: TermDisplay) -> Result<(), TdcError> {
74        if self.push_inner(term)? {
75            self.calculate_regex_set();
76        }
77        Ok(())
78    }
79
80    pub fn remove(&mut self, matcher: &Matcher) -> Option<TermDisplay> {
81        match &matcher.kind {
82            MatcherKind::Exact(s) => {
83                // SAFETY: though the lifetime is 'static in terms of the
84                // compiler, the actual lifetime will end at the end of the
85                // block.
86                let s = unsafe { &*(s.as_str() as *const _) };
87                self.string_matchers
88                    .remove(&(Cow::Borrowed(s), matcher.children))
89            }
90            MatcherKind::Regex(r) => {
91                let idx = self.regex_matchers.iter().position(|t| {
92                    let MatcherKind::Regex(r2) = &t.matcher.kind else {
93                        unreachable!()
94                    };
95                    r2.original() == r.original()
96                })?;
97                let removed = self.regex_matchers.remove(idx);
98                self.calculate_regex_set();
99                Some(removed)
100            }
101        }
102    }
103
104    /// Extends this context with another higher priority one. If there are any
105    /// conflicts, we drop the conflicting entries from the `self` context!
106    pub fn extend(&mut self, other: &Self) {
107        for (k, v) in &other.string_matchers {
108            self.string_matchers.insert(k.clone(), v.clone());
109        }
110        let must_recalculate = !other.regex_matchers.is_empty();
111        let mut regex_matchers = other.regex_matchers.clone();
112        regex_matchers.append(&mut self.regex_matchers);
113        self.regex_matchers = regex_matchers;
114        if must_recalculate {
115            self.calculate_regex_set();
116        }
117    }
118
119    /// Returns the formatter for the given string, defaulting to the fallback
120    /// if none match. See [`Self::match_str_opt`] for more details.
121    pub fn match_str<'a, 'b>(
122        &'b self,
123        haystack: &'a str,
124        children: NonMaxU32,
125    ) -> MatchResult<'a, 'b> {
126        self.match_str_opt(haystack, children)
127            .unwrap_or_else(|| MatchResult {
128                haystack,
129                captures: None,
130                formatter: self.fallback().formatter(),
131            })
132    }
133
134    /// Returns the formatter for the given string, if one exists. If multiple
135    /// matchers match the string, then the first one is returned. The order is
136    /// determined as `Matcher::Exact` first and then the first `Matcher::Regex`
137    /// in the order provided when constructing `self`.
138    pub fn match_str_opt<'a, 'b>(
139        &'b self,
140        haystack: &'a str,
141        children: NonMaxU32,
142    ) -> Option<MatchResult<'a, 'b>> {
143        // SAFETY: though the lifetime is 'static in terms of the
144        // compiler, the actual lifetime will end at the end of the
145        // block.
146        let static_key = unsafe { &*(haystack as *const _) };
147        let string_match = self
148            .string_matchers
149            .get(&(Cow::Borrowed(static_key), Some(children)));
150        let string_match =
151            string_match.or_else(|| self.string_matchers.get(&(Cow::Borrowed(static_key), None)));
152        if let Some(td) = string_match {
153            Some(td.as_match_no_capture(haystack))
154        } else {
155            let mut matches = self
156                .regex_set
157                .matches(haystack)
158                .into_iter()
159                .map(|idx| &self.regex_matchers[idx])
160                .filter(|td| td.matcher.children.is_none_or(|c| c == children));
161            // Fallback match in case of no matches which specify exact children
162            let first = matches.next();
163            let mut matches = first.iter().copied().chain(matches);
164            let specific = matches.find(|td| td.matcher.children.is_some());
165            let match_ = specific.or(first)?;
166            Some(match_.as_match_capture(haystack))
167        }
168    }
169
170    #[cfg(feature = "serde")]
171    pub(super) fn to_parts(&self) -> TermDisplayContextParts<'_> {
172        (&self.string_matchers, &self.regex_matchers, &self.fallback)
173    }
174    #[cfg(feature = "serde")]
175    pub(super) fn from_parts(
176        string_matchers: FxHashMap<(Cow<'static, str>, Option<NonMaxU32>), TermDisplay>,
177        regex_matchers: Vec<TermDisplay>,
178        fallback: FallbackFormatter,
179    ) -> Self {
180        let mut this = TermDisplayContext {
181            string_matchers,
182            regex_matchers,
183            ..Default::default()
184        };
185        this.calculate_regex_set();
186        this.fallback = fallback;
187        this
188    }
189
190    fn push_inner(&mut self, term: TermDisplay) -> Result<bool, TdcError> {
191        match &term.matcher.kind {
192            MatcherKind::Exact(s) => {
193                let k = (Cow::Owned(s.to_string()), term.matcher.children);
194                let duplicate = self.string_matchers.insert(k, term);
195                if let Some(duplicate) = duplicate {
196                    let MatcherKind::Exact(s) = duplicate.matcher.kind else {
197                        unreachable!()
198                    };
199                    Err(TdcError::DuplicateExactMatcher(
200                        s,
201                        duplicate.matcher.children,
202                    ))
203                } else {
204                    Ok(false)
205                }
206            }
207            MatcherKind::Regex(_) => {
208                self.regex_matchers.push(term);
209                Ok(true)
210            }
211        }
212    }
213    fn calculate_regex_set(&mut self) {
214        self.regex_set = regex::RegexSet::new(self.regex_matchers.iter().map(|t| {
215            let MatcherKind::Regex(r) = &t.matcher.kind else {
216                unreachable!()
217            };
218            r.original()
219        }))
220        .unwrap();
221    }
222}
223
224impl PartialEq for TermDisplayContext {
225    fn eq(&self, other: &Self) -> bool {
226        self.string_matchers == other.string_matchers
227            && self.regex_matchers == other.regex_matchers
228            && self.fallback == other.fallback
229    }
230}
231impl Eq for TermDisplayContext {}
232
233pub struct MatchResult<'a, 'b> {
234    pub haystack: &'a str,
235    pub captures: Option<regex::Captures<'a>>,
236    pub formatter: &'b Formatter,
237}
238
239#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
240#[derive(Debug, Clone, PartialEq, Eq)]
241pub struct TermDisplay {
242    pub(crate) matcher: Matcher,
243    pub(crate) formatter: Formatter,
244}
245
246impl TermDisplay {
247    pub const fn empty() -> Self {
248        Self {
249            matcher: Matcher {
250                children: None,
251                kind: MatcherKind::Exact(String::new()),
252            },
253            formatter: Formatter {
254                bind_power: BindPowerPair::symmetric(DEFAULT_BIND_POWER),
255                outputs: Vec::new(),
256                max_capture: None,
257            },
258        }
259    }
260
261    pub fn new(matcher: Matcher, formatter: Formatter) -> Result<Self, ConversionError> {
262        if let Some(max_capture) = formatter.max_capture {
263            let MatcherKind::Regex(r) = &matcher.kind else {
264                return Err(ConversionError::FormatterExpectsRegex(matcher, formatter));
265            };
266            if max_capture.get() as usize >= r.regex().captures_len() {
267                return Err(ConversionError::RegexNotEnoughCaptures(matcher, formatter));
268            }
269        }
270        Ok(Self { matcher, formatter })
271    }
272    pub fn deparse_string(&self) -> (String, String) {
273        (
274            self.matcher.deparse_string(),
275            self.formatter.deparse_string(),
276        )
277    }
278
279    pub fn is_empty(&self) -> bool {
280        self == &Self::empty()
281    }
282
283    /// Call this when you already know that `self.matcher` matches `haystack`.
284    pub const fn as_match_no_capture<'a>(&self, haystack: &'a str) -> MatchResult<'a, '_> {
285        MatchResult {
286            haystack,
287            captures: None,
288            formatter: &self.formatter,
289        }
290    }
291
292    /// Call this when you already know that `self.matcher` matches `haystack`.
293    pub fn as_match_capture<'a>(&self, haystack: &'a str) -> MatchResult<'a, '_> {
294        let MatcherKind::Regex(r) = &self.matcher.kind else {
295            unreachable!()
296        };
297        let Some(max_capture) = self.formatter.max_capture else {
298            return self.as_match_no_capture(haystack);
299        };
300        let captures = r.regex().captures(haystack).unwrap();
301        debug_assert!(captures.len() > max_capture.get() as usize);
302        MatchResult {
303            haystack,
304            captures: Some(captures),
305            formatter: &self.formatter,
306        }
307    }
308}
309
310#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
311#[derive(Debug, Clone, PartialEq, Eq)]
312pub struct Matcher {
313    pub children: Option<NonMaxU32>,
314    pub kind: MatcherKind,
315}
316
317#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
318#[derive(Debug, Clone, PartialEq, Eq)]
319pub enum MatcherKind {
320    Exact(String),
321    Regex(RegexMatcher),
322}
323
324#[derive(Debug, Clone)]
325pub struct RegexMatcher {
326    original: String,
327    regex: regex::Regex,
328}
329impl RegexMatcher {
330    pub fn new(original: String) -> Result<Self, regex::Error> {
331        let regex = regex::Regex::new(&original)?;
332        Ok(Self { original, regex })
333    }
334    pub fn original(&self) -> &String {
335        &self.original
336    }
337    pub fn regex(&self) -> &regex::Regex {
338        &self.regex
339    }
340}
341
342impl PartialEq for RegexMatcher {
343    fn eq(&self, other: &Self) -> bool {
344        self.original == other.original
345    }
346}
347impl Eq for RegexMatcher {}
348
349#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
350#[derive(Debug, Clone, PartialEq, Eq)]
351pub struct Formatter {
352    /// How strongly does the formatter bind its output from the left? Bracketed
353    /// outputs generally have a higher binding power than non-bracketed ones.
354    /// For example `{ ... }` can have a higher binding power, while `... + ...`
355    /// would typically have a lower binding power.
356    pub bind_power: BindPowerPair,
357
358    /// The formatter's output
359    pub outputs: Vec<SubFormatter>,
360
361    /// The maximum value of any stored `SubFormatter::Capture`.
362    pub max_capture: Option<NonMaxU32>,
363}
364
365impl Formatter {
366    pub fn calculate_max_capture(&mut self) {
367        self.max_capture = self
368            .outputs
369            .iter()
370            .flat_map(|o| match o {
371                SubFormatter::Capture(c) => Some(*c),
372                SubFormatter::Repeat(r) => (r.left_sep.max_capture.is_some()
373                    || r.middle_sep.max_capture.is_some()
374                    || r.right_sep.max_capture.is_some())
375                .then(|| {
376                    r.left_sep.max_capture.unwrap_or_default().max(
377                        r.middle_sep
378                            .max_capture
379                            .unwrap_or_default()
380                            .max(r.right_sep.max_capture.unwrap_or_default()),
381                    )
382                }),
383                _ => None,
384            })
385            .max();
386    }
387}
388
389#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
390#[derive(Default, Debug, Clone, PartialEq, Eq)]
391pub struct FallbackFormatter(Formatter);
392
393impl FallbackFormatter {
394    /// Creates the fallback formatter. Returns `Ok` if successful, `Err`
395    /// if the fallback formatter has a non-zero `max_capture`.
396    pub fn new(formatter: Formatter) -> Result<Self, FallbackParseError> {
397        if let Some(mc) = formatter.max_capture.filter(|mc| mc.get() > 0) {
398            Err(FallbackParseError::MaxCaptureTooLarge(mc))
399        } else {
400            Ok(Self(formatter))
401        }
402    }
403    pub fn formatter(&self) -> &Formatter {
404        &self.0
405    }
406}
407
408pub type BindPower = u32;
409
410#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
411#[derive(Debug, Clone, Copy, PartialEq, Eq)]
412pub struct BindPowerPair {
413    pub left: BindPower,
414    pub right: BindPower,
415}
416impl BindPowerPair {
417    pub const fn symmetric(power: BindPower) -> Self {
418        Self {
419            left: power,
420            right: power,
421        }
422    }
423    pub const fn asymmetric(left: BindPower, right: BindPower) -> Self {
424        Self { left, right }
425    }
426    pub const fn is_smaller(&self, other: &Self) -> bool {
427        self.left < other.left || self.right < other.right
428    }
429}
430
431#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
432#[derive(Debug, Clone, PartialEq, Eq)]
433pub enum SubFormatter {
434    String(String),
435    Single {
436        path: ChildPath,
437        index: ChildIndex,
438        /// How strongly does the surrounding context bind the child?
439        bind_power: BindPowerPair,
440    },
441    Repeat(SubFormatterRepeat),
442    Capture(NonMaxU32),
443}
444
445#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
446#[derive(Debug, Clone, PartialEq, Eq)]
447pub struct SubFormatterRepeat {
448    pub path: ChildPath,
449    pub from: ChildIndex,
450    pub to: ChildIndex,
451    pub left_sep: Formatter,
452    pub middle_sep: Formatter,
453    pub right_sep: Formatter,
454    pub left: BindPower,
455    pub middle: BindPowerPair,
456    pub right: BindPower,
457}
458
459#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
460#[derive(Debug, Clone, PartialEq, Eq)]
461pub struct ChildPath(pub(super) Vec<ChildIndex>);
462
463impl ChildPath {
464    pub fn get(&self) -> &[ChildIndex] {
465        &self.0
466    }
467}
468
469#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
470#[derive(Debug, Clone, Copy, PartialEq, Eq)]
471pub struct ChildIndex(pub(super) nonmax::NonMaxI32);
472
473impl ChildIndex {
474    pub const fn get(&self) -> i32 {
475        self.0.get()
476    }
477}