1#![doc = include_str!("../README.md")]
2
3use std::{borrow::Cow, fmt::Display, result, str::FromStr};
4
5use peg::str::LineCol;
6
7#[derive(thiserror::Error, Debug)]
8pub enum Error {
9 #[error("parse: {0}")]
10 Parse(#[from] peg::error::ParseError<LineCol>),
11 #[error("repetition not found: ${0}")]
12 RepetitionNotFoundId(usize),
13 #[error("repetition not found: ${{{0}}}")]
14 RepetitionNotFoundString(String),
15
16 #[error("two groups matched:\n\t{0}\n\t{1}")]
17 GroupConflict(Replacement, Replacement),
18 #[error("no group matched:\n\t{}", .0.iter().map(|g| g.to_string()).collect::<Vec<_>>().join("\n\t"))]
19 NoGroupMatched(Vec<Error>),
20}
21impl Error {
22 fn tolerate_group_fail(&self) -> bool {
23 matches!(
24 self,
25 Self::RepetitionNotFoundId(_)
26 | Self::RepetitionNotFoundString(_)
27 | Self::NoGroupMatched(_)
28 )
29 }
30}
31type Result<T, E = Error> = result::Result<T, E>;
32
33#[derive(Debug, Clone)]
34pub enum Part {
35 Byte(u8),
36 RepId(usize),
37 RepName(String),
38 Group(Vec<Replacement>),
39}
40impl Display for Part {
41 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
42 match self {
43 Part::Byte(v) => write!(f, "\\x{:02x?}", *v),
44 Part::RepId(id) => write!(f, "${id}"),
45 Part::RepName(name) => write!(f, "${{{name}}}"),
46 Part::Group(g) => {
47 write!(f, "(")?;
48 for (i, g) in g.iter().enumerate() {
49 if i != 0 {
50 write!(f, "|")?;
51 }
52 write!(f, "{g}")?;
53 }
54 write!(f, ")")
55 }
56 }
57 }
58}
59
60#[derive(Debug, Clone)]
61pub struct Replacement(Vec<Part>);
62impl Display for Replacement {
63 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
64 for p in &self.0 {
65 write!(f, "{p}")?;
66 }
67 Ok(())
68 }
69}
70
71pub trait Capture {
72 fn get(&self, idx: usize) -> Option<Cow<[u8]>>;
73 fn name(&self, name: &str) -> Option<Cow<[u8]>>;
74}
75
76impl Replacement {
77 pub fn build(&self, cap: &impl Capture) -> Result<Vec<u8>> {
78 let mut out = Vec::new();
79 for part in &self.0 {
80 match part {
81 Part::Byte(b) => out.push(*b),
82 Part::RepId(i) => {
83 let mat = cap.get(*i).ok_or_else(|| Error::RepetitionNotFoundId(*i))?;
84 out.extend(mat.as_ref());
85 }
86 Part::RepName(name) => {
87 let mat = cap
88 .name(name)
89 .ok_or_else(|| Error::RepetitionNotFoundString(name.clone()))?;
90 out.extend(mat.as_ref());
91 }
92 Part::Group(g) => {
93 let mut errors = Vec::new();
94 let mut matched = None;
95 for group in g {
96 match group.build(cap) {
97 Ok(v) => {
98 if let Some(old) = matched.replace((v, group.clone())) {
99 return Err(Error::GroupConflict(old.1.clone(), group.clone()));
100 }
101 }
102 Err(e) if e.tolerate_group_fail() => errors.push(e),
103 Err(e) => return Err(e),
104 };
105 }
106 if let Some((matched, _)) = matched {
107 out.extend(&matched);
108 } else {
109 return Err(Error::NoGroupMatched(errors));
110 }
111 }
112 }
113 }
114 Ok(out)
115 }
116}
117
118impl FromStr for Replacement {
119 type Err = Error;
120
121 fn from_str(s: &str) -> Result<Self, Self::Err> {
122 Ok(replacement::replacement_root(s)?)
123 }
124}
125
126peg::parser! {
127pub grammar replacement() for str {
128 rule hex_char() -> u8
129 = v:['0'..='9'] {v as u8 - '0' as u8}
130 / v:['a'..='f'] {v as u8 - 'a' as u8 + 10}
131 / v:['A'..='F'] {v as u8 - 'A' as u8 + 10}
132 pub rule replacement_root() -> Replacement
133 = wse:quiet!{"(?x)"?} ws(wse.is_some()) v:replacement(wse.is_some()) ws(wse.is_some()) {v}
134 rule replacement(wse:bool) -> Replacement
135 = v:replacement_part(wse)++ws(wse) {Replacement(v)}
136 rule replacement_part(wse:bool) -> Part
137 = quiet!{"\\\\" {Part::Byte(b'\\')}
138 / "\\ " {Part::Byte(b' ')}
139 / "\\x" a:hex_char() b:hex_char() {Part::Byte((a << 4) | b)}
140 / "\\" {? Err("<special character>")}} / expected!("<special character>")
141
142 / quiet!{"$$" {Part::Byte(b'$')}
143 / "$" v:$(['0'..='9']+) {? Ok(Part::RepId(usize::from_str(v).map_err(|_| "bad id")?))}
144 / "$<" v:$((!['}'][_])+) ">" {Part::RepName(v.to_owned())}
145 / "$" {? Err("<selector>")}} / expected!("<selector>")
146
147 / quiet!{"((" {Part::Byte(b'(')}
148 / "))" {Part::Byte(b')')}
149 / "(" ws(wse) groups:replacement(wse)**(ws(wse) "|" ws(wse)) ws(wse) ")" {Part::Group(groups)}
150 / "||" {Part::Byte(b'|')}
151 / "|" {? Err("<group>")}} / expected!("<group>")
152
153 / !['\\' | '$' | '(' | ')' | '|' | '#'] c:['\0'..='\x7f'] {Part::Byte(c as u8)}
154
155 rule ws(wse: bool)
156 = ws_(wse)*
157 rule ws_(wse: bool)
158 = "#" (!['\n'] [_])* ("\n" / ![_]) {? if wse {Ok(())} else {Err("<unexpected whitespace>")}}
159 / c:$(['\n' | ' ' | '\t']) {? if wse || c.is_empty() {Ok(())} else {Err("<unexpected whitespace>")}}
160}
161}