lua_patterns/lib.rs
1//! This is a Rust binding to [Lua string patterns](https://www.lua.org/pil/20.2.html),
2//! using the original code from Lua 5.2.
3//!
4//! Although not regular expressions (they lack alternation) they are a powerful
5//! and lightweight way to process text. Please note that they are not
6//! UTF-8-aware, and in fact can process arbitrary binary data.
7//!
8//! `LuaPattern` can be created from a string _or_ a byte slice, and has
9//! methods which are similar to the original Lua API. Please see
10//! [the README](https://github.com/stevedonovan/lua-patterns/blob/master/readme.md)
11//! for more discussion.
12//!
13//! [LuaPattern](struct.LuaPattern.html) implements the public API.
14//!
15//! ## Examples
16//!
17//! ```rust
18//! extern crate lua_patterns;
19//! let mut m = lua_patterns::LuaPattern::new("one");
20//! let text = "hello one two";
21//! assert!(m.matches(text));
22//! let r = m.range();
23//! assert_eq!(r.start, 6);
24//! assert_eq!(r.end, 9);
25//! ```
26//!
27//! Collecting captures from a match:
28//!
29//! ```rust
30//! extern crate lua_patterns;
31//! let text = " hello one";
32//! let mut m = lua_patterns::LuaPattern::new("(%S+) one");
33//!
34//! // allocates a vector of captures
35//! let v = m.captures(text);
36//! assert_eq!(v, &["hello one","hello"]);
37//! let mut v = Vec::new();
38//! // writes captures into preallocated vector
39//! if m.capture_into(text,&mut v) {
40//! assert_eq!(v, &["hello one","hello"]);
41//! }
42//! ```
43
44use std::{ops, str};
45
46pub mod errors;
47use errors::*;
48mod luapat;
49use luapat::*;
50
51/// Represents a Lua string pattern and the results of a match
52pub struct LuaPattern<'a> {
53 patt: &'a [u8],
54 matches: [LuaMatch; LUA_MAXCAPTURES],
55 n_match: usize,
56}
57
58impl<'a> LuaPattern<'a> {
59 /// Maybe create a new Lua pattern from a slice of bytes
60 pub fn from_bytes_try(bytes: &'a [u8]) -> Result<LuaPattern<'a>, PatternError> {
61 str_check(bytes)?;
62 let matches = [LuaMatch { start: 0, end: 0 }; LUA_MAXCAPTURES];
63 Ok(LuaPattern {
64 patt: bytes,
65 matches: matches,
66 n_match: 0,
67 })
68 }
69
70 /// Maybe create a new Lua pattern from a string
71 pub fn new_try(patt: &'a str) -> Result<LuaPattern<'a>, PatternError> {
72 LuaPattern::from_bytes_try(patt.as_bytes())
73 }
74
75 /// Create a new Lua pattern from a string, panicking if bad
76 pub fn new(patt: &'a str) -> LuaPattern<'a> {
77 LuaPattern::new_try(patt).expect("bad pattern")
78 }
79
80 /// Create a new Lua pattern from a slice of bytes, panicking if bad
81 pub fn from_bytes(bytes: &'a [u8]) -> LuaPattern<'a> {
82 LuaPattern::from_bytes_try(bytes).expect("bad pattern")
83 }
84
85 /// Match a slice of bytes with a pattern
86 ///
87 /// ```
88 /// let patt = &[0xFE,0xEE,b'+',0xED];
89 /// let mut m = lua_patterns::LuaPattern::from_bytes(patt);
90 /// let bytes = &[0x00,0x01,0xFE,0xEE,0xEE,0xED,0xEF];
91 /// assert!(m.matches_bytes(bytes));
92 /// assert_eq!(&bytes[m.range()], &[0xFE,0xEE,0xEE,0xED]);
93 /// ```
94 pub fn matches_bytes(&mut self, s: &[u8]) -> bool {
95 self.n_match =
96 str_match(s, self.patt, &mut self.matches).expect("Should not fail - report as bug");
97 self.n_match > 0
98 }
99
100 /// Match a string with a pattern
101 ///
102 /// ```
103 /// let mut m = lua_patterns::LuaPattern::new("(%a+) one");
104 /// let text = " hello one two";
105 /// assert!(m.matches(text));
106 /// ```
107 pub fn matches(&mut self, text: &str) -> bool {
108 self.matches_bytes(text.as_bytes())
109 }
110
111 /// Match a string, returning first capture if successful
112 ///
113 /// ```
114 /// let mut m = lua_patterns::LuaPattern::new("OK%s+(%d+)");
115 /// let res = m.match_maybe("and that's OK 400 to you");
116 /// assert_eq!(res, Some("400"));
117 /// ```
118 pub fn match_maybe<'t>(&mut self, text: &'t str) -> Option<&'t str> {
119 if self.matches(text) {
120 Some(&text[self.first_capture()])
121 } else {
122 None
123 }
124 }
125
126 /// Match a string, returning first two explicit captures if successful
127 ///
128 /// ```
129 /// let mut p = lua_patterns::LuaPattern::new("%s*(%d+)%s+(%S+)");
130 /// let (int,rest) = p.match_maybe_2(" 233 hello dolly").unwrap();
131 /// assert_eq!(int,"233");
132 /// assert_eq!(rest,"hello");
133 /// ```
134 pub fn match_maybe_2<'t>(&mut self, text: &'t str) -> Option<(&'t str, &'t str)> {
135 if self.matches(text) {
136 let cc = self.match_captures(text);
137 if cc.num_matches() != 3 {
138 return None;
139 }
140 Some((cc.get(1), cc.get(2)))
141 } else {
142 None
143 }
144 }
145
146 /// Match a string, returning first three explicit captures if successful
147 ///
148 /// ```
149 /// let mut p = lua_patterns::LuaPattern::new("(%d+)/(%d+)/(%d+)");
150 /// let (y,m,d) = p.match_maybe_3("2017/11/10").unwrap();
151 /// assert_eq!(y,"2017");
152 /// assert_eq!(m,"11");
153 /// assert_eq!(d,"10");
154 /// ```
155 pub fn match_maybe_3<'t>(&mut self, text: &'t str) -> Option<(&'t str, &'t str, &'t str)> {
156 if self.matches(text) {
157 let cc = self.match_captures(text);
158 if cc.num_matches() != 4 {
159 return None;
160 }
161 Some((cc.get(1), cc.get(2), cc.get(3)))
162 } else {
163 None
164 }
165 }
166
167 /// Match a string, returning first four explicit captures if successful
168 ///
169 /// ```
170 /// let mut p = lua_patterns::LuaPattern::new("(%d+)/(%d+)/(%d+):(%S+)");
171 /// let (y,m,d,r) = p.match_maybe_4("2017/11/10:rest").unwrap();
172 /// assert_eq!(y,"2017");
173 /// assert_eq!(m,"11");
174 /// assert_eq!(d,"10");
175 /// assert_eq!(r,"rest");
176 /// ```
177 pub fn match_maybe_4<'t>(
178 &mut self,
179 text: &'t str,
180 ) -> Option<(&'t str, &'t str, &'t str, &'t str)> {
181 if self.matches(text) {
182 let cc = self.match_captures(text);
183 if cc.num_matches() != 5 {
184 return None;
185 }
186 Some((cc.get(1), cc.get(2), cc.get(3), cc.get(4)))
187 } else {
188 None
189 }
190 }
191
192 /// Match and collect all captures as a vector of string slices
193 ///
194 /// ```
195 /// let mut m = lua_patterns::LuaPattern::new("(one).+");
196 /// assert_eq!(m.captures(" one two"), &["one two","one"]);
197 /// ```
198 pub fn captures<'b>(&mut self, text: &'b str) -> Vec<&'b str> {
199 let mut res = Vec::new();
200 self.capture_into(text, &mut res);
201 res
202 }
203
204 /// A convenient way to access the captures with no allocation
205 ///
206 /// ```rust
207 /// let text = " hello one";
208 /// let mut m = lua_patterns::LuaPattern::new("(%S+) one");
209 /// if m.matches(text) {
210 /// let cc = m.match_captures(text);
211 /// assert_eq!(cc.get(0), "hello one");
212 /// assert_eq!(cc.get(1), "hello");
213 /// }
214 /// ```
215 /// The result is also an iterator over the captures:
216 /// ```rust
217 /// let text = " hello one";
218 /// let mut m = lua_patterns::LuaPattern::new("(%S+) one");
219 /// if m.matches(text) {
220 /// let mut iter = m.match_captures(text);
221 /// assert_eq!(iter.next(), Some("hello one"));
222 /// assert_eq!(iter.next(), Some("hello"));
223 /// }
224 /// ```
225 pub fn match_captures<'b, 'c>(&'c self, text: &'b str) -> Captures<'a, 'b, 'c> {
226 Captures::new(self, text)
227 }
228
229 /// Match and collect all captures into the provided vector.
230 ///
231 /// ```rust
232 /// let text = " hello one";
233 /// let mut m = lua_patterns::LuaPattern::new("(%S+) one");
234 /// let mut v = Vec::new();
235 /// if m.capture_into(text,&mut v) {
236 /// assert_eq!(v, &["hello one","hello"]);
237 /// }
238 /// ```
239 pub fn capture_into<'b>(&mut self, text: &'b str, vec: &mut Vec<&'b str>) -> bool {
240 self.matches(text);
241 vec.clear();
242 for i in 0..self.n_match {
243 vec.push(&text[self.capture(i)]);
244 }
245 self.n_match > 0
246 }
247
248 /// The full match (same as `capture(0)`)
249 pub fn range(&self) -> ops::Range<usize> {
250 self.capture(0)
251 }
252
253 /// Get the nth capture of the match.
254 ///
255 /// ```
256 /// let mut m = lua_patterns::LuaPattern::new("(%a+) one");
257 /// let text = " hello one two";
258 /// assert!(m.matches(text));
259 /// assert_eq!(m.capture(0),1..10);
260 /// assert_eq!(m.capture(1),1..6);
261 /// ```
262 pub fn capture(&self, i: usize) -> ops::Range<usize> {
263 ops::Range {
264 start: self.matches[i].start as usize,
265 end: self.matches[i].end as usize,
266 }
267 }
268
269 /// Get the 'first' capture of the match
270 ///
271 /// If there are no matches, this is the same as `range`,
272 /// otherwise it's `capture(1)`
273 pub fn first_capture(&self) -> ops::Range<usize> {
274 let idx = if self.n_match > 1 { 1 } else { 0 };
275 self.capture(idx)
276 }
277
278 /// An iterator over all matches in a string.
279 ///
280 /// The matches are returned as string slices; if there are no
281 /// captures the full match is used, otherwise the first capture.
282 /// That is, this example will also work with the pattern "(%S+)".
283 ///
284 /// ```
285 /// let mut m = lua_patterns::LuaPattern::new("%S+");
286 /// let split: Vec<_> = m.gmatch("dog cat leopard wolf").collect();
287 /// assert_eq!(split,&["dog","cat","leopard","wolf"]);
288 /// ```
289 pub fn gmatch<'b, 'c>(&'c mut self, text: &'b str) -> GMatch<'a, 'b, 'c> {
290 GMatch {
291 m: self,
292 text: text,
293 }
294 }
295
296 /// An iterator over all captures in a string.
297 ///
298 /// The matches are returned as captures; this is a _streaming_
299 /// iterator, so don't try to collect the captures directly; extract
300 /// the string slices using `get`.
301 ///
302 /// ```
303 /// let mut m = lua_patterns::LuaPattern::new("(%S)%S+");
304 /// let split: Vec<_> = m.gmatch_captures("dog cat leopard wolf")
305 /// .map(|cc| cc.get(1)).collect();
306 /// assert_eq!(split,&["d","c","l","w"]);
307 /// ```
308 pub fn gmatch_captures<'b, 'c>(&'c mut self, text: &'b str) -> GMatchCaptures<'a, 'b, 'c> {
309 GMatchCaptures {
310 m: self,
311 text: text,
312 }
313 }
314
315 /// An iterator over all matches in a slice of bytes.
316 ///
317 /// ```
318 /// let bytes = &[0xAA,0x01,0x01,0x03,0xBB,0x01,0x01,0x01];
319 /// let patt = &[0x01,b'+'];
320 /// let mut m = lua_patterns::LuaPattern::from_bytes(patt);
321 /// let mut iter = m.gmatch_bytes(bytes);
322 /// assert_eq!(iter.next().unwrap(), &[0x01,0x01]);
323 /// assert_eq!(iter.next().unwrap(), &[0x01,0x01,0x01]);
324 /// assert_eq!(iter.next(), None);
325 /// ```
326 pub fn gmatch_bytes<'b>(&'a mut self, bytes: &'b [u8]) -> GMatchBytes<'a, 'b> {
327 GMatchBytes {
328 m: self,
329 bytes: bytes,
330 }
331 }
332
333 /// Globally substitute all matches with a replacement
334 /// provided by a function of the captures.
335 ///
336 /// ```
337 /// let mut m = lua_patterns::LuaPattern::new("%$(%S+)");
338 /// let res = m.gsub_with("hello $dolly you're so $fine!",
339 /// |cc| cc.get(1).to_uppercase()
340 /// );
341 /// assert_eq!(res, "hello DOLLY you're so FINE!");
342 /// ```
343 pub fn gsub_with<F>(&mut self, text: &str, lookup: F) -> String
344 where
345 F: Fn(Captures) -> String,
346 {
347 let mut slice = text;
348 let mut res = String::new();
349 while self.matches(slice) {
350 // full range of match
351 let all = self.range();
352 // append everything up to match
353 res.push_str(&slice[0..all.start]);
354 let captures = Captures::new(self, slice);
355 let repl = lookup(captures);
356 res.push_str(&repl);
357 slice = &slice[all.end..];
358 }
359 res.push_str(slice);
360 res
361 }
362
363 /// Globally substitute all matches with a replacement string
364 ///
365 /// This string _may_ have capture references ("%0",..). Use "%%"
366 /// to represent "%". Plain strings like "" work just fine ;)
367 ///
368 /// ```
369 /// let mut m = lua_patterns::LuaPattern::new("(%S+)%s*=%s*(%S+);%s*");
370 /// let res = m.gsub("a=2; b=3; c = 4;", "'%2':%1 ");
371 /// assert_eq!(res,"'2':a '3':b '4':c ");
372 /// ```
373 pub fn gsub(&mut self, text: &str, repl: &str) -> String {
374 String::from_utf8(self.gsub_bytes(text.as_bytes(), repl.as_bytes())).unwrap()
375 }
376
377 /// Globally substitute all matches with a replacement string
378 ///
379 /// There will be an error if the result is bad UTF-8 (consider using `gsub_bytes`)
380 pub fn gsub_checked(&mut self, text: &str, repl: &str) -> Result<String, PatternError> {
381 String::from_utf8(self.gsub_bytes(text.as_bytes(), repl.as_bytes()))
382 .map_err(|e| PatternError::Utf8(e))
383 }
384
385 /// Globally substitute all _byte_ matches with replacement bytes
386 ///
387 /// Like `gsub` the replacement bytes may contain b"%0" etc
388 ///
389 /// ```
390 /// let bytes = &[0xAA,0x01,0x02,0x03,0xBB];
391 /// let patt = &[0x01,0x02];
392 /// let mut m = lua_patterns::LuaPattern::from_bytes(patt);
393 /// let res = m.gsub_bytes(bytes,&[0xFF]);
394 /// assert_eq!(res, &[0xAA,0xFF,0x03,0xBB]);
395 /// ```
396 pub fn gsub_bytes(&mut self, text: &[u8], repl: &[u8]) -> Vec<u8> {
397 let repl = ByteSubst::gsub_patterns(repl);
398 let mut slice = text;
399 let mut res = Vec::new();
400 while self.matches_bytes(slice) {
401 let all = self.range();
402 let capture = &slice[0..all.start];
403 res.extend_from_slice(capture);
404 let captures = ByteCaptures {
405 m: self,
406 bytes: slice,
407 };
408 for r in &repl {
409 match r {
410 ByteSubst::Bytes(s) => res.extend_from_slice(s),
411 ByteSubst::Capture(i) => res.extend_from_slice(captures.get(*i)),
412 }
413 }
414 slice = &slice[all.end..];
415 }
416 res.extend_from_slice(slice);
417 res
418 }
419
420 /// Globally substitute all _byte_ matches with a replacement
421 /// provided by a function of the captures.
422 ///
423 /// ```
424 /// let bytes = &[0xAA,0x01,0x02,0x03,0xBB];
425 /// let patt = &[0x01,0x02];
426 /// let mut m = lua_patterns::LuaPattern::from_bytes(patt);
427 /// let res = m.gsub_bytes_with(bytes,|cc| vec![0xFF]);
428 /// assert_eq!(res, &[0xAA,0xFF,0x03,0xBB]);
429 /// ```
430 pub fn gsub_bytes_with<F>(&mut self, bytes: &[u8], lookup: F) -> Vec<u8>
431 where
432 F: Fn(ByteCaptures) -> Vec<u8>,
433 {
434 let mut slice = bytes;
435 let mut res = Vec::new();
436 while self.matches_bytes(slice) {
437 let all = self.range();
438 let capture = &slice[0..all.start];
439 res.extend_from_slice(capture);
440 let captures = ByteCaptures {
441 m: self,
442 bytes: slice,
443 };
444 let repl = lookup(captures);
445 res.extend(repl);
446 slice = &slice[all.end..];
447 }
448 res.extend_from_slice(slice);
449 res
450 }
451}
452
453#[derive(Debug)]
454pub enum ByteSubst {
455 Bytes(Vec<u8>),
456 Capture(usize),
457}
458
459impl ByteSubst {
460 fn new_bytes(bytes: &[u8]) -> Self {
461 Self::Bytes(bytes.to_vec())
462 }
463
464 pub fn gsub_patterns(repl: &[u8]) -> Vec<Self> {
465 let mut m = LuaPattern::new("%%([%%%d])");
466 let mut res = Vec::new();
467 let mut slice = repl;
468 while m.matches_bytes(slice) {
469 let all = m.range();
470 let before = &slice[0..all.start];
471 if before != b"" {
472 res.push(Self::new_bytes(before));
473 }
474 let capture = &slice[m.capture(1)];
475 if capture == b"%" {
476 // escaped literal '%'
477 res.push(Self::new_bytes(b"%"));
478 } else {
479 // has to be a digit
480 let index: usize = str::from_utf8(capture).unwrap().parse().unwrap();
481 res.push(Self::Capture(index));
482 }
483 slice = &slice[all.end..];
484 }
485 res.push(Self::new_bytes(slice));
486 res
487 }
488}
489
490/// Low-overhead convenient access to string match captures
491// note: there are three borrows going on here.
492// The lifetime 'a is for the _pattern_, the lifetime 'b is
493// for the _source string_, and 'c is for the reference to LuaPattern
494// And the LuaPattern reference cannot live longer than the pattern reference
495pub struct Captures<'a, 'b, 'c>
496where
497 'a: 'c,
498{
499 m: &'c LuaPattern<'a>,
500 text: &'b str,
501 i_match: usize,
502}
503
504impl<'a, 'b, 'c> Captures<'a, 'b, 'c> {
505 fn new(m: &'c LuaPattern<'a>, text: &'b str) -> Self {
506 Self {
507 i_match: 0,
508 m,
509 text,
510 }
511 }
512
513 /// get the capture as a string slice
514 pub fn get(&self, i: usize) -> &'b str {
515 &self.text[self.m.capture(i)]
516 }
517
518 /// number of matches
519 pub fn num_matches(&self) -> usize {
520 self.m.n_match
521 }
522}
523
524impl<'a, 'b, 'c> Iterator for Captures<'a, 'b, 'c> {
525 type Item = &'b str;
526
527 fn next(&mut self) -> Option<Self::Item> {
528 if self.i_match == self.num_matches() {
529 None
530 } else {
531 let res = Some(self.get(self.i_match));
532 self.i_match += 1;
533 res
534 }
535 }
536}
537
538/// Low-overhead convenient access to byte match captures
539pub struct ByteCaptures<'a, 'b> {
540 m: &'a LuaPattern<'a>,
541 bytes: &'b [u8],
542}
543
544impl<'a, 'b> ByteCaptures<'a, 'b> {
545 /// get the capture as a byte slice
546 pub fn get(&self, i: usize) -> &'b [u8] {
547 &self.bytes[self.m.capture(i)]
548 }
549
550 /// number of matches
551 pub fn num_matches(&self) -> usize {
552 self.m.n_match
553 }
554}
555
556/// Iterator for all string slices from `gmatch`
557// note lifetimes as for Captures above!
558pub struct GMatch<'a, 'b, 'c>
559where
560 'a: 'c,
561{
562 m: &'c mut LuaPattern<'a>,
563 text: &'b str,
564}
565
566impl<'a, 'b, 'c> Iterator for GMatch<'a, 'b, 'c> {
567 type Item = &'b str;
568
569 fn next(&mut self) -> Option<Self::Item> {
570 if !self.m.matches(self.text) {
571 None
572 } else {
573 let slice = &self.text[self.m.first_capture()];
574 self.text = &self.text[self.m.range().end..];
575 Some(slice)
576 }
577 }
578}
579
580/// Unsafe version of Captures, needed for gmatch_captures
581// It's unsafe because the lifetime only depends on the original
582// text, not the borrowed matches.
583pub struct CapturesUnsafe<'b> {
584 matches: *const LuaMatch,
585 text: &'b str,
586}
587
588impl<'b> CapturesUnsafe<'b> {
589 /// get the capture as a string slice
590 pub fn get(&self, i: usize) -> &'b str {
591 unsafe {
592 let p = self.matches.offset(i as isize);
593 let range = ops::Range {
594 start: (*p).start as usize,
595 end: (*p).end as usize,
596 };
597 &self.text[range]
598 }
599 }
600}
601
602/// Streaming iterator for all captures from `gmatch_captures`
603// lifetimes as for Captures above!
604// 'a is pattern, 'b is text, 'c is ref to LuaPattern
605pub struct GMatchCaptures<'a, 'b, 'c>
606where
607 'a: 'c,
608{
609 m: &'c mut LuaPattern<'a>,
610 text: &'b str,
611}
612
613impl<'a, 'b, 'c> Iterator for GMatchCaptures<'a, 'b, 'c>
614where
615 'a: 'c,
616{
617 type Item = CapturesUnsafe<'b>;
618
619 fn next(&mut self) -> Option<Self::Item> {
620 if !self.m.matches(self.text) {
621 None
622 } else {
623 let split = self.text.split_at(self.m.range().end);
624 self.text = split.1;
625 let match_ptr: *const LuaMatch = self.m.matches.as_ptr();
626 Some(CapturesUnsafe {
627 matches: match_ptr,
628 text: split.0,
629 })
630 }
631 }
632}
633
634/// Iterator for all byte slices from `gmatch_bytes`
635pub struct GMatchBytes<'a, 'b> {
636 m: &'a mut LuaPattern<'a>,
637 bytes: &'b [u8],
638}
639
640impl<'a, 'b> Iterator for GMatchBytes<'a, 'b> {
641 type Item = &'b [u8];
642
643 fn next(&mut self) -> Option<Self::Item> {
644 if !self.m.matches_bytes(self.bytes) {
645 None
646 } else {
647 let slice = &self.bytes[self.m.first_capture()];
648 self.bytes = &self.bytes[self.m.range().end..];
649 Some(slice)
650 }
651 }
652}
653
654/// Build a byte Lua pattern, optionally escaping 'magic' characters
655pub struct LuaPatternBuilder {
656 bytes: Vec<u8>,
657}
658
659impl LuaPatternBuilder {
660 /// Create a new Lua pattern builder
661 pub fn new() -> LuaPatternBuilder {
662 LuaPatternBuilder { bytes: Vec::new() }
663 }
664
665 /// Add unescaped characters from a string
666 ///
667 /// ```
668 /// let patt = lua_patterns::LuaPatternBuilder::new()
669 /// .text("(boo)")
670 /// .build();
671 /// assert_eq!(std::str::from_utf8(&patt).unwrap(), "(boo)");
672 /// ```
673 pub fn text(&mut self, s: &str) -> &mut Self {
674 self.bytes.extend_from_slice(s.as_bytes());
675 self
676 }
677
678 /// Add unescaped characters from lines
679 ///
680 /// This looks for first non-whitespace run in each line,
681 /// useful for spreading patterns out and commmenting them.
682 /// Works with patterns that use '%s' religiously!
683 ///
684 /// ```
685 /// let patt = lua_patterns::LuaPatternBuilder::new()
686 /// .text_lines("
687 /// hello-dolly
688 /// you-are-fine # comment
689 /// cool
690 /// ")
691 /// .build();
692 /// assert_eq!(std::str::from_utf8(&patt).unwrap(),
693 /// "hello-dollyyou-are-finecool");
694 /// ```
695 pub fn text_lines(&mut self, lines: &str) -> &mut Self {
696 let mut text = String::new();
697 for line in lines.lines() {
698 if let Some(first) = line.split_whitespace().next() {
699 text.push_str(first);
700 }
701 }
702 self.text(&text)
703 }
704
705 /// Add escaped bytes from a slice
706 ///
707 /// ```
708 /// let patt = lua_patterns::LuaPatternBuilder::new()
709 /// .text("^")
710 /// .bytes(b"^") // magic character!
711 /// .build();
712 /// assert_eq!(std::str::from_utf8(&patt).unwrap(), "^%^");
713 /// ```
714 pub fn bytes(&mut self, b: &[u8]) -> &mut Self {
715 let mut m = LuaPattern::new("[%-%.%+%[%]%(%)%$%^%%%?%*]");
716 let bb = m.gsub_bytes_with(b, |cc| {
717 let mut res = Vec::new();
718 res.push(b'%');
719 res.push(cc.get(0)[0]);
720 res
721 });
722 self.bytes.extend(bb);
723 self
724 }
725
726 /// Add escaped bytes from hex string
727 ///
728 /// This consists of adjacent pairs of hex digits.
729 ///
730 /// ```
731 /// let patt = lua_patterns::LuaPatternBuilder::new()
732 /// .text("^")
733 /// .bytes_as_hex("5E") // which is ASCII '^'
734 /// .build();
735 /// assert_eq!(std::str::from_utf8(&patt).unwrap(), "^%^");
736 /// ```
737 pub fn bytes_as_hex(&mut self, bs: &str) -> &mut Self {
738 let bb = LuaPatternBuilder::hex_to_bytes(bs);
739 self.bytes(&bb)
740 }
741
742 /// Create the pattern
743 pub fn build(&mut self) -> Vec<u8> {
744 let mut v = Vec::new();
745 std::mem::swap(&mut self.bytes, &mut v);
746 v
747 }
748
749 /// Utility to create a vector of bytes from a hex string
750 ///
751 /// ```
752 /// let bb = lua_patterns::LuaPatternBuilder::hex_to_bytes("AEFE00FE");
753 /// assert_eq!(bb, &[0xAE,0xFE,0x00,0xFE]);
754 /// ```
755 pub fn hex_to_bytes(s: &str) -> Vec<u8> {
756 let mut m = LuaPattern::new("%x%x");
757 m.gmatch(s)
758 .map(|pair| u8::from_str_radix(pair, 16).unwrap())
759 .collect()
760 }
761
762 /// Utility to create a hex string from a slice of bytes
763 ///
764 /// ```
765 /// let hex = lua_patterns::LuaPatternBuilder::bytes_to_hex(&[0xAE,0xFE,0x00,0xFE]);
766 /// assert_eq!(hex,"AEFE00FE");
767 ///
768 /// ```
769 pub fn bytes_to_hex(s: &[u8]) -> String {
770 s.iter().map(|b| format!("{:02X}", b)).collect()
771 }
772}
773
774#[cfg(test)]
775mod tests {
776 use super::*;
777
778 #[test]
779 fn captures_and_matching() {
780 let mut m = LuaPattern::new("(one).+");
781 assert_eq!(m.captures(" one two"), &["one two", "one"]);
782 let empty: &[&str] = &[];
783 assert_eq!(m.captures("four"), empty);
784
785 assert_eq!(m.matches("one dog"), true);
786 assert_eq!(m.matches("dog one "), true);
787 assert_eq!(m.matches("dog one"), false);
788
789 let text = "one dog";
790 let mut m = LuaPattern::new("^(%a+)");
791 assert_eq!(m.matches(text), true);
792 assert_eq!(&text[m.capture(1)], "one");
793 assert_eq!(m.matches(" one dog"), false);
794
795 // captures without allocation
796 m.matches(text);
797 let captures = m.match_captures(text);
798 assert_eq!(captures.get(0), "one");
799 assert_eq!(captures.get(1), "one");
800
801 let mut m = LuaPattern::new("(%S+)%s*=%s*(.+)");
802
803 // captures as Vec
804 let cc = m.captures(" hello= bonzo dog");
805 assert_eq!(cc[0], "hello= bonzo dog");
806 assert_eq!(cc[1], "hello");
807 assert_eq!(cc[2], "bonzo dog");
808 }
809
810 #[test]
811 fn multiple_captures() {
812 let mut p = LuaPattern::new("%s*(%d+)%s+(%S+)");
813 let (int, rest) = p.match_maybe_2(" 233 hello dolly").unwrap();
814 assert_eq!(int, "233");
815 assert_eq!(rest, "hello");
816 }
817
818 #[test]
819 fn gmatch() {
820 let mut m = LuaPattern::new("%a+");
821 let mut iter = m.gmatch("one two three");
822 assert_eq!(iter.next(), Some("one"));
823 assert_eq!(iter.next(), Some("two"));
824 assert_eq!(iter.next(), Some("three"));
825 assert_eq!(iter.next(), None);
826
827 let mut m = LuaPattern::new("(%a+)");
828 let mut iter = m.gmatch("one two three");
829 assert_eq!(iter.next(), Some("one"));
830 assert_eq!(iter.next(), Some("two"));
831 assert_eq!(iter.next(), Some("three"));
832 assert_eq!(iter.next(), None);
833
834 let mut m = LuaPattern::new("(%a+)");
835 let mut iter = m.gmatch_captures("one two three");
836 assert_eq!(iter.next().unwrap().get(1), "one");
837 assert_eq!(iter.next().unwrap().get(1), "two");
838 assert_eq!(iter.next().unwrap().get(1), "three");
839 }
840
841 #[test]
842 fn gsub() {
843 use std::collections::HashMap;
844
845 let mut m = LuaPattern::new("%$(%S+)");
846 let res = m.gsub_with("hello $dolly you're so $fine!", |cc| {
847 cc.get(1).to_uppercase()
848 });
849 assert_eq!(res, "hello DOLLY you're so FINE!");
850
851 let mut map = HashMap::new();
852 map.insert("dolly", "baby");
853 map.insert("fine", "cool");
854 map.insert("good-looking", "pretty");
855
856 let mut m = LuaPattern::new("%$%((.-)%)");
857 let res = m.gsub_with(
858 "hello $(dolly) you're so $(fine) and $(good-looking)",
859 |cc| map.get(cc.get(1)).unwrap_or(&"?").to_string(),
860 );
861 assert_eq!(res, "hello baby you're so cool and pretty");
862
863 let mut m = LuaPattern::new("%s+");
864 let res = m.gsub("hello dolly you're so fine", "");
865 assert_eq!(res, "hellodollyyou'resofine");
866
867 let mut m = LuaPattern::new("(%S+)%s*=%s*(%S+);%s*");
868 let res = m.gsub("a=2; b=3; c = 4;", "'%2':%1 ");
869 assert_eq!(res, "'2':a '3':b '4':c ");
870 }
871
872 #[test]
873 fn bad_patterns() {
874 let bad = [
875 ("bonzo %", "malformed pattern (ends with '%')"),
876 ("bonzo (dog%(", "unfinished capture"),
877 ("alles [%a%[", "malformed pattern (missing ']')"),
878 ("bonzo (dog (cat)", "unfinished capture"),
879 ("frodo %f[%A", "malformed pattern (missing ']')"),
880 ("frodo (1) (2(3)%2)%1", "invalid capture index %2"),
881 ];
882 for p in bad.iter() {
883 let res = LuaPattern::new_try(p.0);
884 if let Err(e) = res {
885 assert_eq!(e, PatternError::Pattern(p.1.into()));
886 } else {
887 panic!("false positive");
888 }
889 }
890 }
891}