Skip to main content

rgx/explain/
formatter.rs

1use regex_syntax::ast::{
2    Assertion, AssertionKind, ClassBracketed, ClassPerl, ClassPerlKind, ClassSet, ClassSetItem,
3    ClassUnicode, ClassUnicodeKind, FlagsItem, FlagsItemKind, Group, GroupKind, Literal,
4    Repetition, RepetitionKind, RepetitionRange,
5};
6
7pub fn format_literal(lit: &Literal) -> String {
8    let c = lit.c;
9    if c.is_alphanumeric() {
10        format!("Literal character '{c}'")
11    } else {
12        format!("Literal '{c}' (U+{:04X})", c as u32)
13    }
14}
15
16pub fn format_assertion(assertion: &Assertion) -> String {
17    match assertion.kind {
18        AssertionKind::StartLine => "Start of line (^)".to_string(),
19        AssertionKind::EndLine => "End of line ($)".to_string(),
20        AssertionKind::StartText => "Start of text (\\A)".to_string(),
21        AssertionKind::EndText => "End of text (\\z)".to_string(),
22        AssertionKind::WordBoundary => "Word boundary (\\b)".to_string(),
23        AssertionKind::NotWordBoundary => "Not a word boundary (\\B)".to_string(),
24        _ => "Assertion".to_string(),
25    }
26}
27
28pub fn format_perl_class(class: &ClassPerl) -> String {
29    let negated = class.negated;
30    match class.kind {
31        ClassPerlKind::Digit => {
32            if negated {
33                "Non-digit character (\\D)".to_string()
34            } else {
35                "Digit character [0-9] (\\d)".to_string()
36            }
37        }
38        ClassPerlKind::Space => {
39            if negated {
40                "Non-whitespace character (\\S)".to_string()
41            } else {
42                "Whitespace character (\\s)".to_string()
43            }
44        }
45        ClassPerlKind::Word => {
46            if negated {
47                "Non-word character (\\W)".to_string()
48            } else {
49                "Word character [a-zA-Z0-9_] (\\w)".to_string()
50            }
51        }
52    }
53}
54
55pub fn format_unicode_class(class: &ClassUnicode) -> String {
56    let negated = if class.negated { "not " } else { "" };
57    match &class.kind {
58        ClassUnicodeKind::OneLetter(c) => {
59            format!("Unicode property {negated}'{c}'")
60        }
61        ClassUnicodeKind::Named(name) => {
62            format!("Unicode category {negated}'{name}'")
63        }
64        ClassUnicodeKind::NamedValue { name, value, .. } => {
65            format!("Unicode property {negated}{name}={value}")
66        }
67    }
68}
69
70pub fn format_bracketed_class(class: &ClassBracketed) -> String {
71    let negated = if class.negated { "not " } else { "" };
72    let items = describe_class_set(&class.kind);
73    format!("Character class: {negated}[{items}]")
74}
75
76fn describe_class_set(set: &ClassSet) -> String {
77    match set {
78        ClassSet::Item(item) => describe_class_set_item(item),
79        ClassSet::BinaryOp(op) => {
80            format!(
81                "{} {:?} {}",
82                describe_class_set(&op.lhs),
83                op.kind,
84                describe_class_set(&op.rhs)
85            )
86        }
87    }
88}
89
90fn describe_class_set_item(item: &ClassSetItem) -> String {
91    match item {
92        ClassSetItem::Empty(_) => String::new(),
93        ClassSetItem::Literal(lit) => format!("{}", lit.c),
94        ClassSetItem::Range(range) => {
95            format!("{}-{}", range.start.c, range.end.c)
96        }
97        ClassSetItem::Ascii(ascii) => {
98            let negated = if ascii.negated { "^" } else { "" };
99            format!("{negated}{:?}", ascii.kind)
100        }
101        ClassSetItem::Unicode(u) => format_unicode_class(u),
102        ClassSetItem::Perl(p) => format_perl_class(p),
103        ClassSetItem::Bracketed(b) => format_bracketed_class(b),
104        ClassSetItem::Union(union) => union
105            .items
106            .iter()
107            .map(describe_class_set_item)
108            .collect::<Vec<_>>()
109            .join(", "),
110    }
111}
112
113pub fn format_repetition(rep: &Repetition) -> String {
114    let greedy = if rep.greedy { "" } else { " (lazy)" };
115    match &rep.op.kind {
116        RepetitionKind::ZeroOrOne => format!("Optional (0 or 1 time){greedy}"),
117        RepetitionKind::ZeroOrMore => format!("Zero or more times{greedy}"),
118        RepetitionKind::OneOrMore => format!("One or more times{greedy}"),
119        RepetitionKind::Range(range) => match range {
120            RepetitionRange::Exactly(n) => format!("Exactly {n} times"),
121            RepetitionRange::AtLeast(n) => format!("At least {n} times{greedy}"),
122            RepetitionRange::Bounded(min, max) => {
123                format!("Between {min} and {max} times{greedy}")
124            }
125        },
126    }
127}
128
129pub fn format_group(group: &Group) -> String {
130    match &group.kind {
131        GroupKind::CaptureIndex(idx) => {
132            format!("Capture group #{}", idx)
133        }
134        GroupKind::CaptureName { name, .. } => {
135            format!("Named capture group '{}'", name.name)
136        }
137        GroupKind::NonCapturing(_) => "Non-capturing group".to_string(),
138    }
139}
140
141pub fn format_flags_item(flags: &regex_syntax::ast::Flags) -> String {
142    let items: Vec<String> = flags.items.iter().map(format_single_flag).collect();
143    format!("Set flags: {}", items.join(", "))
144}
145
146fn format_single_flag(item: &FlagsItem) -> String {
147    match &item.kind {
148        FlagsItemKind::Negation => "disable".to_string(),
149        FlagsItemKind::Flag(flag) => {
150            use regex_syntax::ast::Flag;
151            match flag {
152                Flag::CaseInsensitive => "case-insensitive (i)".to_string(),
153                Flag::MultiLine => "multi-line (m)".to_string(),
154                Flag::DotMatchesNewLine => "dot matches newline (s)".to_string(),
155                Flag::SwapGreed => "swap greedy (U)".to_string(),
156                Flag::Unicode => "unicode (u)".to_string(),
157                Flag::CRLF => "CRLF mode (R)".to_string(),
158                Flag::IgnoreWhitespace => "ignore whitespace (x)".to_string(),
159            }
160        }
161    }
162}