use std::{ops, str};
pub mod errors;
use errors::*;
mod luapat;
use luapat::*;
pub struct LuaPattern<'a> {
patt: &'a [u8],
matches: [LuaMatch; LUA_MAXCAPTURES],
n_match: usize,
}
impl<'a> LuaPattern<'a> {
pub fn from_bytes_try(bytes: &'a [u8]) -> Result<LuaPattern<'a>, PatternError> {
str_check(bytes)?;
let matches = [LuaMatch { start: 0, end: 0 }; LUA_MAXCAPTURES];
Ok(LuaPattern {
patt: bytes,
matches: matches,
n_match: 0,
})
}
pub fn new_try(patt: &'a str) -> Result<LuaPattern<'a>, PatternError> {
LuaPattern::from_bytes_try(patt.as_bytes())
}
pub fn new(patt: &'a str) -> LuaPattern<'a> {
LuaPattern::new_try(patt).expect("bad pattern")
}
pub fn from_bytes(bytes: &'a [u8]) -> LuaPattern<'a> {
LuaPattern::from_bytes_try(bytes).expect("bad pattern")
}
pub fn matches_bytes(&mut self, s: &[u8]) -> bool {
self.n_match =
str_match(s, self.patt, &mut self.matches).expect("Should not fail - report as bug");
self.n_match > 0
}
pub fn matches(&mut self, text: &str) -> bool {
self.matches_bytes(text.as_bytes())
}
pub fn match_maybe<'t>(&mut self, text: &'t str) -> Option<&'t str> {
if self.matches(text) {
Some(&text[self.first_capture()])
} else {
None
}
}
pub fn match_maybe_2<'t>(&mut self, text: &'t str) -> Option<(&'t str, &'t str)> {
if self.matches(text) {
let cc = self.match_captures(text);
if cc.num_matches() != 3 {
return None;
}
Some((cc.get(1), cc.get(2)))
} else {
None
}
}
pub fn match_maybe_3<'t>(&mut self, text: &'t str) -> Option<(&'t str, &'t str, &'t str)> {
if self.matches(text) {
let cc = self.match_captures(text);
if cc.num_matches() != 4 {
return None;
}
Some((cc.get(1), cc.get(2), cc.get(3)))
} else {
None
}
}
pub fn match_maybe_4<'t>(
&mut self,
text: &'t str,
) -> Option<(&'t str, &'t str, &'t str, &'t str)> {
if self.matches(text) {
let cc = self.match_captures(text);
if cc.num_matches() != 5 {
return None;
}
Some((cc.get(1), cc.get(2), cc.get(3), cc.get(4)))
} else {
None
}
}
pub fn captures<'b>(&mut self, text: &'b str) -> Vec<&'b str> {
let mut res = Vec::new();
self.capture_into(text, &mut res);
res
}
pub fn match_captures<'b, 'c>(&'c self, text: &'b str) -> Captures<'a, 'b, 'c> {
Captures::new(self, text)
}
pub fn capture_into<'b>(&mut self, text: &'b str, vec: &mut Vec<&'b str>) -> bool {
self.matches(text);
vec.clear();
for i in 0..self.n_match {
vec.push(&text[self.capture(i)]);
}
self.n_match > 0
}
pub fn range(&self) -> ops::Range<usize> {
self.capture(0)
}
pub fn capture(&self, i: usize) -> ops::Range<usize> {
ops::Range {
start: self.matches[i].start as usize,
end: self.matches[i].end as usize,
}
}
pub fn first_capture(&self) -> ops::Range<usize> {
let idx = if self.n_match > 1 { 1 } else { 0 };
self.capture(idx)
}
pub fn gmatch<'b, 'c>(&'c mut self, text: &'b str) -> GMatch<'a, 'b, 'c> {
GMatch {
m: self,
text: text,
}
}
pub fn gmatch_captures<'b, 'c>(&'c mut self, text: &'b str) -> GMatchCaptures<'a, 'b, 'c> {
GMatchCaptures {
m: self,
text: text,
}
}
pub fn gmatch_bytes<'b>(&'a mut self, bytes: &'b [u8]) -> GMatchBytes<'a, 'b> {
GMatchBytes {
m: self,
bytes: bytes,
}
}
pub fn gsub_with<F>(&mut self, text: &str, lookup: F) -> String
where
F: Fn(Captures) -> String,
{
let mut slice = text;
let mut res = String::new();
while self.matches(slice) {
let all = self.range();
res.push_str(&slice[0..all.start]);
let captures = Captures::new(self, slice);
let repl = lookup(captures);
res.push_str(&repl);
slice = &slice[all.end..];
}
res.push_str(slice);
res
}
pub fn gsub(&mut self, text: &str, repl: &str) -> String {
String::from_utf8(self.gsub_bytes(text.as_bytes(), repl.as_bytes())).unwrap()
}
pub fn gsub_checked(&mut self, text: &str, repl: &str) -> Result<String, PatternError> {
String::from_utf8(self.gsub_bytes(text.as_bytes(), repl.as_bytes()))
.map_err(|e| PatternError::Utf8(e))
}
pub fn gsub_bytes(&mut self, text: &[u8], repl: &[u8]) -> Vec<u8> {
let repl = ByteSubst::gsub_patterns(repl);
let mut slice = text;
let mut res = Vec::new();
while self.matches_bytes(slice) {
let all = self.range();
let capture = &slice[0..all.start];
res.extend_from_slice(capture);
let captures = ByteCaptures {
m: self,
bytes: slice,
};
for r in &repl {
match r {
ByteSubst::Bytes(s) => res.extend_from_slice(s),
ByteSubst::Capture(i) => res.extend_from_slice(captures.get(*i)),
}
}
slice = &slice[all.end..];
}
res.extend_from_slice(slice);
res
}
pub fn gsub_bytes_with<F>(&mut self, bytes: &[u8], lookup: F) -> Vec<u8>
where
F: Fn(ByteCaptures) -> Vec<u8>,
{
let mut slice = bytes;
let mut res = Vec::new();
while self.matches_bytes(slice) {
let all = self.range();
let capture = &slice[0..all.start];
res.extend_from_slice(capture);
let captures = ByteCaptures {
m: self,
bytes: slice,
};
let repl = lookup(captures);
res.extend(repl);
slice = &slice[all.end..];
}
res.extend_from_slice(slice);
res
}
}
#[derive(Debug)]
pub enum ByteSubst {
Bytes(Vec<u8>),
Capture(usize),
}
impl ByteSubst {
fn new_bytes(bytes: &[u8]) -> Self {
Self::Bytes(bytes.to_vec())
}
pub fn gsub_patterns(repl: &[u8]) -> Vec<Self> {
let mut m = LuaPattern::new("%%([%%%d])");
let mut res = Vec::new();
let mut slice = repl;
while m.matches_bytes(slice) {
let all = m.range();
let before = &slice[0..all.start];
if before != b"" {
res.push(Self::new_bytes(before));
}
let capture = &slice[m.capture(1)];
if capture == b"%" {
res.push(Self::new_bytes(b"%"));
} else {
let index: usize = str::from_utf8(capture).unwrap().parse().unwrap();
res.push(Self::Capture(index));
}
slice = &slice[all.end..];
}
res.push(Self::new_bytes(slice));
res
}
}
pub struct Captures<'a, 'b, 'c>
where
'a: 'c,
{
m: &'c LuaPattern<'a>,
text: &'b str,
i_match: usize,
}
impl<'a, 'b, 'c> Captures<'a, 'b, 'c> {
fn new(m: &'c LuaPattern<'a>, text: &'b str) -> Self {
Self {
i_match: 0,
m,
text,
}
}
pub fn get(&self, i: usize) -> &'b str {
&self.text[self.m.capture(i)]
}
pub fn num_matches(&self) -> usize {
self.m.n_match
}
}
impl<'a, 'b, 'c> Iterator for Captures<'a, 'b, 'c> {
type Item = &'b str;
fn next(&mut self) -> Option<Self::Item> {
if self.i_match == self.num_matches() {
None
} else {
let res = Some(self.get(self.i_match));
self.i_match += 1;
res
}
}
}
pub struct ByteCaptures<'a, 'b> {
m: &'a LuaPattern<'a>,
bytes: &'b [u8],
}
impl<'a, 'b> ByteCaptures<'a, 'b> {
pub fn get(&self, i: usize) -> &'b [u8] {
&self.bytes[self.m.capture(i)]
}
pub fn num_matches(&self) -> usize {
self.m.n_match
}
}
pub struct GMatch<'a, 'b, 'c>
where
'a: 'c,
{
m: &'c mut LuaPattern<'a>,
text: &'b str,
}
impl<'a, 'b, 'c> Iterator for GMatch<'a, 'b, 'c> {
type Item = &'b str;
fn next(&mut self) -> Option<Self::Item> {
if !self.m.matches(self.text) {
None
} else {
let slice = &self.text[self.m.first_capture()];
self.text = &self.text[self.m.range().end..];
Some(slice)
}
}
}
pub struct CapturesUnsafe<'b> {
matches: *const LuaMatch,
text: &'b str,
}
impl<'b> CapturesUnsafe<'b> {
pub fn get(&self, i: usize) -> &'b str {
unsafe {
let p = self.matches.offset(i as isize);
let range = ops::Range {
start: (*p).start as usize,
end: (*p).end as usize,
};
&self.text[range]
}
}
}
pub struct GMatchCaptures<'a, 'b, 'c>
where
'a: 'c,
{
m: &'c mut LuaPattern<'a>,
text: &'b str,
}
impl<'a, 'b, 'c> Iterator for GMatchCaptures<'a, 'b, 'c>
where
'a: 'c,
{
type Item = CapturesUnsafe<'b>;
fn next(&mut self) -> Option<Self::Item> {
if !self.m.matches(self.text) {
None
} else {
let split = self.text.split_at(self.m.range().end);
self.text = split.1;
let match_ptr: *const LuaMatch = self.m.matches.as_ptr();
Some(CapturesUnsafe {
matches: match_ptr,
text: split.0,
})
}
}
}
pub struct GMatchBytes<'a, 'b> {
m: &'a mut LuaPattern<'a>,
bytes: &'b [u8],
}
impl<'a, 'b> Iterator for GMatchBytes<'a, 'b> {
type Item = &'b [u8];
fn next(&mut self) -> Option<Self::Item> {
if !self.m.matches_bytes(self.bytes) {
None
} else {
let slice = &self.bytes[self.m.first_capture()];
self.bytes = &self.bytes[self.m.range().end..];
Some(slice)
}
}
}
pub struct LuaPatternBuilder {
bytes: Vec<u8>,
}
impl LuaPatternBuilder {
pub fn new() -> LuaPatternBuilder {
LuaPatternBuilder { bytes: Vec::new() }
}
pub fn text(&mut self, s: &str) -> &mut Self {
self.bytes.extend_from_slice(s.as_bytes());
self
}
pub fn text_lines(&mut self, lines: &str) -> &mut Self {
let mut text = String::new();
for line in lines.lines() {
if let Some(first) = line.split_whitespace().next() {
text.push_str(first);
}
}
self.text(&text)
}
pub fn bytes(&mut self, b: &[u8]) -> &mut Self {
let mut m = LuaPattern::new("[%-%.%+%[%]%(%)%$%^%%%?%*]");
let bb = m.gsub_bytes_with(b, |cc| {
let mut res = Vec::new();
res.push(b'%');
res.push(cc.get(0)[0]);
res
});
self.bytes.extend(bb);
self
}
pub fn bytes_as_hex(&mut self, bs: &str) -> &mut Self {
let bb = LuaPatternBuilder::hex_to_bytes(bs);
self.bytes(&bb)
}
pub fn build(&mut self) -> Vec<u8> {
let mut v = Vec::new();
std::mem::swap(&mut self.bytes, &mut v);
v
}
pub fn hex_to_bytes(s: &str) -> Vec<u8> {
let mut m = LuaPattern::new("%x%x");
m.gmatch(s)
.map(|pair| u8::from_str_radix(pair, 16).unwrap())
.collect()
}
pub fn bytes_to_hex(s: &[u8]) -> String {
s.iter().map(|b| format!("{:02X}", b)).collect()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn captures_and_matching() {
let mut m = LuaPattern::new("(one).+");
assert_eq!(m.captures(" one two"), &["one two", "one"]);
let empty: &[&str] = &[];
assert_eq!(m.captures("four"), empty);
assert_eq!(m.matches("one dog"), true);
assert_eq!(m.matches("dog one "), true);
assert_eq!(m.matches("dog one"), false);
let text = "one dog";
let mut m = LuaPattern::new("^(%a+)");
assert_eq!(m.matches(text), true);
assert_eq!(&text[m.capture(1)], "one");
assert_eq!(m.matches(" one dog"), false);
m.matches(text);
let captures = m.match_captures(text);
assert_eq!(captures.get(0), "one");
assert_eq!(captures.get(1), "one");
let mut m = LuaPattern::new("(%S+)%s*=%s*(.+)");
let cc = m.captures(" hello= bonzo dog");
assert_eq!(cc[0], "hello= bonzo dog");
assert_eq!(cc[1], "hello");
assert_eq!(cc[2], "bonzo dog");
}
#[test]
fn multiple_captures() {
let mut p = LuaPattern::new("%s*(%d+)%s+(%S+)");
let (int, rest) = p.match_maybe_2(" 233 hello dolly").unwrap();
assert_eq!(int, "233");
assert_eq!(rest, "hello");
}
#[test]
fn gmatch() {
let mut m = LuaPattern::new("%a+");
let mut iter = m.gmatch("one two three");
assert_eq!(iter.next(), Some("one"));
assert_eq!(iter.next(), Some("two"));
assert_eq!(iter.next(), Some("three"));
assert_eq!(iter.next(), None);
let mut m = LuaPattern::new("(%a+)");
let mut iter = m.gmatch("one two three");
assert_eq!(iter.next(), Some("one"));
assert_eq!(iter.next(), Some("two"));
assert_eq!(iter.next(), Some("three"));
assert_eq!(iter.next(), None);
let mut m = LuaPattern::new("(%a+)");
let mut iter = m.gmatch_captures("one two three");
assert_eq!(iter.next().unwrap().get(1), "one");
assert_eq!(iter.next().unwrap().get(1), "two");
assert_eq!(iter.next().unwrap().get(1), "three");
}
#[test]
fn gsub() {
use std::collections::HashMap;
let mut m = LuaPattern::new("%$(%S+)");
let res = m.gsub_with("hello $dolly you're so $fine!", |cc| {
cc.get(1).to_uppercase()
});
assert_eq!(res, "hello DOLLY you're so FINE!");
let mut map = HashMap::new();
map.insert("dolly", "baby");
map.insert("fine", "cool");
map.insert("good-looking", "pretty");
let mut m = LuaPattern::new("%$%((.-)%)");
let res = m.gsub_with(
"hello $(dolly) you're so $(fine) and $(good-looking)",
|cc| map.get(cc.get(1)).unwrap_or(&"?").to_string(),
);
assert_eq!(res, "hello baby you're so cool and pretty");
let mut m = LuaPattern::new("%s+");
let res = m.gsub("hello dolly you're so fine", "");
assert_eq!(res, "hellodollyyou'resofine");
let mut m = LuaPattern::new("(%S+)%s*=%s*(%S+);%s*");
let res = m.gsub("a=2; b=3; c = 4;", "'%2':%1 ");
assert_eq!(res, "'2':a '3':b '4':c ");
}
#[test]
fn bad_patterns() {
let bad = [
("bonzo %", "malformed pattern (ends with '%')"),
("bonzo (dog%(", "unfinished capture"),
("alles [%a%[", "malformed pattern (missing ']')"),
("bonzo (dog (cat)", "unfinished capture"),
("frodo %f[%A", "malformed pattern (missing ']')"),
("frodo (1) (2(3)%2)%1", "invalid capture index %2"),
];
for p in bad.iter() {
let res = LuaPattern::new_try(p.0);
if let Err(e) = res {
assert_eq!(e, PatternError::Pattern(p.1.into()));
} else {
panic!("false positive");
}
}
}
}