sewer_replacement/
lib.rs

1#![doc = include_str!("../README.md")]
2
3use std::{borrow::Cow, fmt::Display, result, str::FromStr};
4
5use peg::str::LineCol;
6
7#[derive(thiserror::Error, Debug)]
8pub enum Error {
9	#[error("parse: {0}")]
10	Parse(#[from] peg::error::ParseError<LineCol>),
11	#[error("repetition not found: ${0}")]
12	RepetitionNotFoundId(usize),
13	#[error("repetition not found: ${{{0}}}")]
14	RepetitionNotFoundString(String),
15
16	#[error("two groups matched:\n\t{0}\n\t{1}")]
17	GroupConflict(Replacement, Replacement),
18	#[error("no group matched:\n\t{}", .0.iter().map(|g| g.to_string()).collect::<Vec<_>>().join("\n\t"))]
19	NoGroupMatched(Vec<Error>),
20}
21impl Error {
22	fn tolerate_group_fail(&self) -> bool {
23		matches!(
24			self,
25			Self::RepetitionNotFoundId(_)
26				| Self::RepetitionNotFoundString(_)
27				| Self::NoGroupMatched(_)
28		)
29	}
30}
31type Result<T, E = Error> = result::Result<T, E>;
32
33#[derive(Debug, Clone)]
34pub enum Part {
35	Byte(u8),
36	RepId(usize),
37	RepName(String),
38	Group(Vec<Replacement>),
39}
40impl Display for Part {
41	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
42		match self {
43			Part::Byte(v) => write!(f, "\\x{:02x?}", *v),
44			Part::RepId(id) => write!(f, "${id}"),
45			Part::RepName(name) => write!(f, "${{{name}}}"),
46			Part::Group(g) => {
47				write!(f, "(")?;
48				for (i, g) in g.iter().enumerate() {
49					if i != 0 {
50						write!(f, "|")?;
51					}
52					write!(f, "{g}")?;
53				}
54				write!(f, ")")
55			}
56		}
57	}
58}
59
60#[derive(Debug, Clone)]
61pub struct Replacement(Vec<Part>);
62impl Display for Replacement {
63	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
64		for p in &self.0 {
65			write!(f, "{p}")?;
66		}
67		Ok(())
68	}
69}
70
71pub trait Capture {
72	fn get(&self, idx: usize) -> Option<Cow<[u8]>>;
73	fn name(&self, name: &str) -> Option<Cow<[u8]>>;
74}
75
76impl Replacement {
77	pub fn build(&self, cap: &impl Capture) -> Result<Vec<u8>> {
78		let mut out = Vec::new();
79		for part in &self.0 {
80			match part {
81				Part::Byte(b) => out.push(*b),
82				Part::RepId(i) => {
83					let mat = cap.get(*i).ok_or_else(|| Error::RepetitionNotFoundId(*i))?;
84					out.extend(mat.as_ref());
85				}
86				Part::RepName(name) => {
87					let mat = cap
88						.name(name)
89						.ok_or_else(|| Error::RepetitionNotFoundString(name.clone()))?;
90					out.extend(mat.as_ref());
91				}
92				Part::Group(g) => {
93					let mut errors = Vec::new();
94					let mut matched = None;
95					for group in g {
96						match group.build(cap) {
97							Ok(v) => {
98								if let Some(old) = matched.replace((v, group.clone())) {
99									return Err(Error::GroupConflict(old.1.clone(), group.clone()));
100								}
101							}
102							Err(e) if e.tolerate_group_fail() => errors.push(e),
103							Err(e) => return Err(e),
104						};
105					}
106					if let Some((matched, _)) = matched {
107						out.extend(&matched);
108					} else {
109						return Err(Error::NoGroupMatched(errors));
110					}
111				}
112			}
113		}
114		Ok(out)
115	}
116}
117
118impl FromStr for Replacement {
119	type Err = Error;
120
121	fn from_str(s: &str) -> Result<Self, Self::Err> {
122		Ok(replacement::replacement_root(s)?)
123	}
124}
125
126peg::parser! {
127pub grammar replacement() for str {
128	rule hex_char() -> u8
129	= v:['0'..='9'] {v as u8 - '0' as u8}
130	/ v:['a'..='f'] {v as u8 - 'a' as u8 + 10}
131	/ v:['A'..='F'] {v as u8 - 'A' as u8 + 10}
132	pub rule replacement_root() -> Replacement
133	= wse:quiet!{"(?x)"?} ws(wse.is_some()) v:replacement(wse.is_some()) ws(wse.is_some()) {v}
134	rule replacement(wse:bool) -> Replacement
135	=  v:replacement_part(wse)++ws(wse) {Replacement(v)}
136	rule replacement_part(wse:bool) -> Part
137	= quiet!{"\\\\" {Part::Byte(b'\\')}
138	/ "\\ " {Part::Byte(b' ')}
139	/ "\\x" a:hex_char() b:hex_char() {Part::Byte((a << 4) | b)}
140	/ "\\" {? Err("<special character>")}} / expected!("<special character>")
141
142	/ quiet!{"$$" {Part::Byte(b'$')}
143	/ "$" v:$(['0'..='9']+) {? Ok(Part::RepId(usize::from_str(v).map_err(|_| "bad id")?))}
144	/ "$<" v:$((!['}'][_])+) ">" {Part::RepName(v.to_owned())}
145	/ "$" {? Err("<selector>")}} / expected!("<selector>")
146
147	/ quiet!{"((" {Part::Byte(b'(')}
148	/ "))" {Part::Byte(b')')}
149	/ "(" ws(wse) groups:replacement(wse)**(ws(wse) "|" ws(wse)) ws(wse) ")" {Part::Group(groups)}
150	/ "||" {Part::Byte(b'|')}
151	/ "|" {? Err("<group>")}} / expected!("<group>")
152
153	/ !['\\' | '$' | '(' | ')' | '|' | '#'] c:['\0'..='\x7f'] {Part::Byte(c as u8)}
154
155	rule ws(wse: bool)
156	= ws_(wse)*
157	rule ws_(wse: bool)
158	= "#" (!['\n'] [_])* ("\n" / ![_]) {? if wse {Ok(())} else {Err("<unexpected whitespace>")}}
159	/ c:$(['\n' | ' ' | '\t']) {? if wse || c.is_empty() {Ok(())} else {Err("<unexpected whitespace>")}}
160}
161}