lua_patterns/
lib.rs

1//! This is a Rust binding to [Lua string patterns](https://www.lua.org/pil/20.2.html),
2//! using the original code from Lua 5.2.
3//!
4//! Although not regular expressions (they lack alternation) they are a powerful
5//! and lightweight way to process text. Please note that they are not
6//! UTF-8-aware, and in fact can process arbitrary binary data.
7//!
8//! `LuaPattern` can be created from a string _or_ a byte slice, and has
9//! methods which are similar to the original Lua API. Please see
10//! [the README](https://github.com/stevedonovan/lua-patterns/blob/master/readme.md)
11//! for more discussion.
12//!
13//! [LuaPattern](struct.LuaPattern.html) implements the public API.
14//!
15//! ## Examples
16//!
17//! ```rust
18//! extern crate lua_patterns;
19//! let mut m = lua_patterns::LuaPattern::new("one");
20//! let text = "hello one two";
21//! assert!(m.matches(text));
22//! let r = m.range();
23//! assert_eq!(r.start, 6);
24//! assert_eq!(r.end, 9);
25//! ```
26//!
27//! Collecting captures from a match:
28//!
29//! ```rust
30//! extern crate lua_patterns;
31//! let text = "  hello one";
32//! let mut m = lua_patterns::LuaPattern::new("(%S+) one");
33//!
34//! // allocates a vector of captures
35//! let v = m.captures(text);
36//! assert_eq!(v, &["hello one","hello"]);
37//! let mut v = Vec::new();
38//! // writes captures into preallocated vector
39//! if m.capture_into(text,&mut v) {
40//!     assert_eq!(v, &["hello one","hello"]);
41//! }
42//! ```
43
44use std::{ops, str};
45
46pub mod errors;
47use errors::*;
48mod luapat;
49use luapat::*;
50
51/// Represents a Lua string pattern and the results of a match
52pub struct LuaPattern<'a> {
53    patt: &'a [u8],
54    matches: [LuaMatch; LUA_MAXCAPTURES],
55    n_match: usize,
56}
57
58impl<'a> LuaPattern<'a> {
59    /// Maybe create a new Lua pattern from a slice of bytes
60    pub fn from_bytes_try(bytes: &'a [u8]) -> Result<LuaPattern<'a>, PatternError> {
61        str_check(bytes)?;
62        let matches = [LuaMatch { start: 0, end: 0 }; LUA_MAXCAPTURES];
63        Ok(LuaPattern {
64            patt: bytes,
65            matches: matches,
66            n_match: 0,
67        })
68    }
69
70    /// Maybe create a new Lua pattern from a string
71    pub fn new_try(patt: &'a str) -> Result<LuaPattern<'a>, PatternError> {
72        LuaPattern::from_bytes_try(patt.as_bytes())
73    }
74
75    /// Create a new Lua pattern from a string, panicking if bad
76    pub fn new(patt: &'a str) -> LuaPattern<'a> {
77        LuaPattern::new_try(patt).expect("bad pattern")
78    }
79
80    /// Create a new Lua pattern from a slice of bytes, panicking if bad
81    pub fn from_bytes(bytes: &'a [u8]) -> LuaPattern<'a> {
82        LuaPattern::from_bytes_try(bytes).expect("bad pattern")
83    }
84
85    /// Match a slice of bytes with a pattern
86    ///
87    /// ```
88    /// let patt = &[0xFE,0xEE,b'+',0xED];
89    /// let mut m = lua_patterns::LuaPattern::from_bytes(patt);
90    /// let bytes = &[0x00,0x01,0xFE,0xEE,0xEE,0xED,0xEF];
91    /// assert!(m.matches_bytes(bytes));
92    /// assert_eq!(&bytes[m.range()], &[0xFE,0xEE,0xEE,0xED]);
93    /// ```
94    pub fn matches_bytes(&mut self, s: &[u8]) -> bool {
95        self.n_match =
96            str_match(s, self.patt, &mut self.matches).expect("Should not fail - report as bug");
97        self.n_match > 0
98    }
99
100    /// Match a string with a pattern
101    ///
102    /// ```
103    /// let mut m = lua_patterns::LuaPattern::new("(%a+) one");
104    /// let text = " hello one two";
105    /// assert!(m.matches(text));
106    /// ```
107    pub fn matches(&mut self, text: &str) -> bool {
108        self.matches_bytes(text.as_bytes())
109    }
110
111    /// Match a string, returning first capture if successful
112    ///
113    /// ```
114    /// let mut m = lua_patterns::LuaPattern::new("OK%s+(%d+)");
115    /// let res = m.match_maybe("and that's OK 400 to you");
116    /// assert_eq!(res, Some("400"));
117    /// ```
118    pub fn match_maybe<'t>(&mut self, text: &'t str) -> Option<&'t str> {
119        if self.matches(text) {
120            Some(&text[self.first_capture()])
121        } else {
122            None
123        }
124    }
125
126    /// Match a string, returning first two explicit captures if successful
127    ///
128    /// ```
129    /// let mut p = lua_patterns::LuaPattern::new("%s*(%d+)%s+(%S+)");
130    /// let (int,rest) = p.match_maybe_2(" 233   hello dolly").unwrap();
131    /// assert_eq!(int,"233");
132    /// assert_eq!(rest,"hello");
133    /// ```
134    pub fn match_maybe_2<'t>(&mut self, text: &'t str) -> Option<(&'t str, &'t str)> {
135        if self.matches(text) {
136            let cc = self.match_captures(text);
137            if cc.num_matches() != 3 {
138                return None;
139            }
140            Some((cc.get(1), cc.get(2)))
141        } else {
142            None
143        }
144    }
145
146    /// Match a string, returning first three explicit captures if successful
147    ///
148    /// ```
149    /// let mut p = lua_patterns::LuaPattern::new("(%d+)/(%d+)/(%d+)");
150    /// let (y,m,d) = p.match_maybe_3("2017/11/10").unwrap();
151    /// assert_eq!(y,"2017");
152    /// assert_eq!(m,"11");
153    /// assert_eq!(d,"10");
154    /// ```
155    pub fn match_maybe_3<'t>(&mut self, text: &'t str) -> Option<(&'t str, &'t str, &'t str)> {
156        if self.matches(text) {
157            let cc = self.match_captures(text);
158            if cc.num_matches() != 4 {
159                return None;
160            }
161            Some((cc.get(1), cc.get(2), cc.get(3)))
162        } else {
163            None
164        }
165    }
166
167    /// Match a string, returning first four explicit captures if successful
168    ///
169    /// ```
170    /// let mut p = lua_patterns::LuaPattern::new("(%d+)/(%d+)/(%d+):(%S+)");
171    /// let (y,m,d,r) = p.match_maybe_4("2017/11/10:rest").unwrap();
172    /// assert_eq!(y,"2017");
173    /// assert_eq!(m,"11");
174    /// assert_eq!(d,"10");
175    /// assert_eq!(r,"rest");
176    /// ```
177    pub fn match_maybe_4<'t>(
178        &mut self,
179        text: &'t str,
180    ) -> Option<(&'t str, &'t str, &'t str, &'t str)> {
181        if self.matches(text) {
182            let cc = self.match_captures(text);
183            if cc.num_matches() != 5 {
184                return None;
185            }
186            Some((cc.get(1), cc.get(2), cc.get(3), cc.get(4)))
187        } else {
188            None
189        }
190    }
191
192    /// Match and collect all captures as a vector of string slices
193    ///
194    /// ```
195    /// let mut m = lua_patterns::LuaPattern::new("(one).+");
196    /// assert_eq!(m.captures(" one two"), &["one two","one"]);
197    /// ```
198    pub fn captures<'b>(&mut self, text: &'b str) -> Vec<&'b str> {
199        let mut res = Vec::new();
200        self.capture_into(text, &mut res);
201        res
202    }
203
204    /// A convenient way to access the captures with no allocation
205    ///
206    /// ```rust
207    /// let text = "  hello one";
208    /// let mut m = lua_patterns::LuaPattern::new("(%S+) one");
209    /// if m.matches(text) {
210    ///     let cc = m.match_captures(text);
211    ///     assert_eq!(cc.get(0), "hello one");
212    ///     assert_eq!(cc.get(1), "hello");
213    /// }
214    /// ```
215    /// The result is also an iterator over the captures:
216    /// ```rust
217    /// let text = "  hello one";
218    /// let mut m = lua_patterns::LuaPattern::new("(%S+) one");
219    /// if m.matches(text) {
220    ///     let mut iter = m.match_captures(text);
221    ///     assert_eq!(iter.next(), Some("hello one"));
222    ///     assert_eq!(iter.next(), Some("hello"));
223    /// }
224    /// ```    
225    pub fn match_captures<'b, 'c>(&'c self, text: &'b str) -> Captures<'a, 'b, 'c> {
226        Captures::new(self, text)
227    }
228
229    /// Match and collect all captures into the provided vector.
230    ///
231    /// ```rust
232    /// let text = "  hello one";
233    /// let mut m = lua_patterns::LuaPattern::new("(%S+) one");
234    /// let mut v = Vec::new();
235    /// if m.capture_into(text,&mut v) {
236    ///     assert_eq!(v, &["hello one","hello"]);
237    /// }
238    /// ```
239    pub fn capture_into<'b>(&mut self, text: &'b str, vec: &mut Vec<&'b str>) -> bool {
240        self.matches(text);
241        vec.clear();
242        for i in 0..self.n_match {
243            vec.push(&text[self.capture(i)]);
244        }
245        self.n_match > 0
246    }
247
248    /// The full match (same as `capture(0)`)
249    pub fn range(&self) -> ops::Range<usize> {
250        self.capture(0)
251    }
252
253    /// Get the nth capture of the match.
254    ///
255    /// ```
256    /// let mut m = lua_patterns::LuaPattern::new("(%a+) one");
257    /// let text = " hello one two";
258    /// assert!(m.matches(text));
259    /// assert_eq!(m.capture(0),1..10);
260    /// assert_eq!(m.capture(1),1..6);
261    /// ```
262    pub fn capture(&self, i: usize) -> ops::Range<usize> {
263        ops::Range {
264            start: self.matches[i].start as usize,
265            end: self.matches[i].end as usize,
266        }
267    }
268
269    /// Get the 'first' capture of the match
270    ///
271    /// If there are no matches, this is the same as `range`,
272    /// otherwise it's `capture(1)`
273    pub fn first_capture(&self) -> ops::Range<usize> {
274        let idx = if self.n_match > 1 { 1 } else { 0 };
275        self.capture(idx)
276    }
277
278    /// An iterator over all matches in a string.
279    ///
280    /// The matches are returned as string slices; if there are no
281    /// captures the full match is used, otherwise the first capture.
282    /// That is, this example will also work with the pattern "(%S+)".
283    ///
284    /// ```
285    /// let mut m = lua_patterns::LuaPattern::new("%S+");
286    /// let split: Vec<_> = m.gmatch("dog  cat leopard wolf").collect();
287    /// assert_eq!(split,&["dog","cat","leopard","wolf"]);
288    /// ```
289    pub fn gmatch<'b, 'c>(&'c mut self, text: &'b str) -> GMatch<'a, 'b, 'c> {
290        GMatch {
291            m: self,
292            text: text,
293        }
294    }
295
296    /// An iterator over all captures in a string.
297    ///
298    /// The matches are returned as captures; this is a _streaming_
299    /// iterator, so don't try to collect the captures directly; extract
300    /// the string slices using `get`.
301    ///
302    /// ```
303    /// let mut m = lua_patterns::LuaPattern::new("(%S)%S+");
304    /// let split: Vec<_> = m.gmatch_captures("dog  cat leopard wolf")
305    ///       .map(|cc| cc.get(1)).collect();
306    /// assert_eq!(split,&["d","c","l","w"]);
307    /// ```
308    pub fn gmatch_captures<'b, 'c>(&'c mut self, text: &'b str) -> GMatchCaptures<'a, 'b, 'c> {
309        GMatchCaptures {
310            m: self,
311            text: text,
312        }
313    }
314
315    /// An iterator over all matches in a slice of bytes.
316    ///
317    /// ```
318    /// let bytes = &[0xAA,0x01,0x01,0x03,0xBB,0x01,0x01,0x01];
319    /// let patt = &[0x01,b'+'];
320    /// let mut m = lua_patterns::LuaPattern::from_bytes(patt);
321    /// let mut iter = m.gmatch_bytes(bytes);
322    /// assert_eq!(iter.next().unwrap(), &[0x01,0x01]);
323    /// assert_eq!(iter.next().unwrap(), &[0x01,0x01,0x01]);
324    /// assert_eq!(iter.next(), None);
325    /// ```
326    pub fn gmatch_bytes<'b>(&'a mut self, bytes: &'b [u8]) -> GMatchBytes<'a, 'b> {
327        GMatchBytes {
328            m: self,
329            bytes: bytes,
330        }
331    }
332
333    /// Globally substitute all matches with a replacement
334    /// provided by a function of the captures.
335    ///
336    /// ```
337    /// let mut m = lua_patterns::LuaPattern::new("%$(%S+)");
338    /// let res = m.gsub_with("hello $dolly you're so $fine!",
339    ///     |cc| cc.get(1).to_uppercase()
340    /// );
341    /// assert_eq!(res, "hello DOLLY you're so FINE!");
342    /// ```
343    pub fn gsub_with<F>(&mut self, text: &str, lookup: F) -> String
344    where
345        F: Fn(Captures) -> String,
346    {
347        let mut slice = text;
348        let mut res = String::new();
349        while self.matches(slice) {
350            // full range of match
351            let all = self.range();
352            // append everything up to match
353            res.push_str(&slice[0..all.start]);
354            let captures = Captures::new(self, slice);
355            let repl = lookup(captures);
356            res.push_str(&repl);
357            slice = &slice[all.end..];
358        }
359        res.push_str(slice);
360        res
361    }
362
363    /// Globally substitute all matches with a replacement string
364    ///
365    /// This string _may_ have capture references ("%0",..). Use "%%"
366    /// to represent "%". Plain strings like "" work just fine ;)
367    ///
368    /// ```
369    /// let mut m = lua_patterns::LuaPattern::new("(%S+)%s*=%s*(%S+);%s*");
370    /// let res = m.gsub("a=2; b=3; c = 4;", "'%2':%1 ");
371    /// assert_eq!(res,"'2':a '3':b '4':c ");
372    /// ```
373    pub fn gsub(&mut self, text: &str, repl: &str) -> String {
374        String::from_utf8(self.gsub_bytes(text.as_bytes(), repl.as_bytes())).unwrap()
375    }
376
377    /// Globally substitute all matches with a replacement string
378    ///
379    /// There will be an error if the result is bad UTF-8 (consider using `gsub_bytes`)
380    pub fn gsub_checked(&mut self, text: &str, repl: &str) -> Result<String, PatternError> {
381        String::from_utf8(self.gsub_bytes(text.as_bytes(), repl.as_bytes()))
382            .map_err(|e| PatternError::Utf8(e))
383    }
384
385    /// Globally substitute all _byte_ matches with replacement bytes
386    ///
387    /// Like `gsub` the replacement bytes may contain b"%0" etc
388    ///
389    /// ```
390    /// let bytes = &[0xAA,0x01,0x02,0x03,0xBB];
391    /// let patt = &[0x01,0x02];
392    /// let mut m = lua_patterns::LuaPattern::from_bytes(patt);
393    /// let res = m.gsub_bytes(bytes,&[0xFF]);
394    /// assert_eq!(res, &[0xAA,0xFF,0x03,0xBB]);
395    /// ```
396    pub fn gsub_bytes(&mut self, text: &[u8], repl: &[u8]) -> Vec<u8> {
397        let repl = ByteSubst::gsub_patterns(repl);
398        let mut slice = text;
399        let mut res = Vec::new();
400        while self.matches_bytes(slice) {
401            let all = self.range();
402            let capture = &slice[0..all.start];
403            res.extend_from_slice(capture);
404            let captures = ByteCaptures {
405                m: self,
406                bytes: slice,
407            };
408            for r in &repl {
409                match r {
410                    ByteSubst::Bytes(s) => res.extend_from_slice(s),
411                    ByteSubst::Capture(i) => res.extend_from_slice(captures.get(*i)),
412                }
413            }
414            slice = &slice[all.end..];
415        }
416        res.extend_from_slice(slice);
417        res
418    }
419
420    /// Globally substitute all _byte_ matches with a replacement
421    /// provided by a function of the captures.
422    ///
423    /// ```
424    /// let bytes = &[0xAA,0x01,0x02,0x03,0xBB];
425    /// let patt = &[0x01,0x02];
426    /// let mut m = lua_patterns::LuaPattern::from_bytes(patt);
427    /// let res = m.gsub_bytes_with(bytes,|cc| vec![0xFF]);
428    /// assert_eq!(res, &[0xAA,0xFF,0x03,0xBB]);
429    /// ```
430    pub fn gsub_bytes_with<F>(&mut self, bytes: &[u8], lookup: F) -> Vec<u8>
431    where
432        F: Fn(ByteCaptures) -> Vec<u8>,
433    {
434        let mut slice = bytes;
435        let mut res = Vec::new();
436        while self.matches_bytes(slice) {
437            let all = self.range();
438            let capture = &slice[0..all.start];
439            res.extend_from_slice(capture);
440            let captures = ByteCaptures {
441                m: self,
442                bytes: slice,
443            };
444            let repl = lookup(captures);
445            res.extend(repl);
446            slice = &slice[all.end..];
447        }
448        res.extend_from_slice(slice);
449        res
450    }
451}
452
453#[derive(Debug)]
454pub enum ByteSubst {
455    Bytes(Vec<u8>),
456    Capture(usize),
457}
458
459impl ByteSubst {
460    fn new_bytes(bytes: &[u8]) -> Self {
461        Self::Bytes(bytes.to_vec())
462    }
463
464    pub fn gsub_patterns(repl: &[u8]) -> Vec<Self> {
465        let mut m = LuaPattern::new("%%([%%%d])");
466        let mut res = Vec::new();
467        let mut slice = repl;
468        while m.matches_bytes(slice) {
469            let all = m.range();
470            let before = &slice[0..all.start];
471            if before != b"" {
472                res.push(Self::new_bytes(before));
473            }
474            let capture = &slice[m.capture(1)];
475            if capture == b"%" {
476                // escaped literal '%'
477                res.push(Self::new_bytes(b"%"));
478            } else {
479                // has to be a digit
480                let index: usize = str::from_utf8(capture).unwrap().parse().unwrap();
481                res.push(Self::Capture(index));
482            }
483            slice = &slice[all.end..];
484        }
485        res.push(Self::new_bytes(slice));
486        res
487    }
488}
489
490/// Low-overhead convenient access to string match captures
491// note: there are three borrows going on here.
492// The lifetime 'a is for the _pattern_, the lifetime 'b is
493// for the _source string_, and 'c is for the reference to LuaPattern
494// And the LuaPattern reference cannot live longer than the pattern reference
495pub struct Captures<'a, 'b, 'c>
496where
497    'a: 'c,
498{
499    m: &'c LuaPattern<'a>,
500    text: &'b str,
501    i_match: usize,
502}
503
504impl<'a, 'b, 'c> Captures<'a, 'b, 'c> {
505    fn new(m: &'c LuaPattern<'a>, text: &'b str) -> Self {
506        Self {
507            i_match: 0,
508            m,
509            text,
510        }
511    }
512
513    /// get the capture as a string slice
514    pub fn get(&self, i: usize) -> &'b str {
515        &self.text[self.m.capture(i)]
516    }
517
518    /// number of matches
519    pub fn num_matches(&self) -> usize {
520        self.m.n_match
521    }
522}
523
524impl<'a, 'b, 'c> Iterator for Captures<'a, 'b, 'c> {
525    type Item = &'b str;
526
527    fn next(&mut self) -> Option<Self::Item> {
528        if self.i_match == self.num_matches() {
529            None
530        } else {
531            let res = Some(self.get(self.i_match));
532            self.i_match += 1;
533            res
534        }
535    }
536}
537
538/// Low-overhead convenient access to byte match captures
539pub struct ByteCaptures<'a, 'b> {
540    m: &'a LuaPattern<'a>,
541    bytes: &'b [u8],
542}
543
544impl<'a, 'b> ByteCaptures<'a, 'b> {
545    /// get the capture as a byte slice
546    pub fn get(&self, i: usize) -> &'b [u8] {
547        &self.bytes[self.m.capture(i)]
548    }
549
550    /// number of matches
551    pub fn num_matches(&self) -> usize {
552        self.m.n_match
553    }
554}
555
556/// Iterator for all string slices from `gmatch`
557// note lifetimes as for Captures above!
558pub struct GMatch<'a, 'b, 'c>
559where
560    'a: 'c,
561{
562    m: &'c mut LuaPattern<'a>,
563    text: &'b str,
564}
565
566impl<'a, 'b, 'c> Iterator for GMatch<'a, 'b, 'c> {
567    type Item = &'b str;
568
569    fn next(&mut self) -> Option<Self::Item> {
570        if !self.m.matches(self.text) {
571            None
572        } else {
573            let slice = &self.text[self.m.first_capture()];
574            self.text = &self.text[self.m.range().end..];
575            Some(slice)
576        }
577    }
578}
579
580/// Unsafe version of Captures, needed for gmatch_captures
581// It's unsafe because the lifetime only depends on the original
582// text, not the borrowed matches.
583pub struct CapturesUnsafe<'b> {
584    matches: *const LuaMatch,
585    text: &'b str,
586}
587
588impl<'b> CapturesUnsafe<'b> {
589    /// get the capture as a string slice
590    pub fn get(&self, i: usize) -> &'b str {
591        unsafe {
592            let p = self.matches.offset(i as isize);
593            let range = ops::Range {
594                start: (*p).start as usize,
595                end: (*p).end as usize,
596            };
597            &self.text[range]
598        }
599    }
600}
601
602/// Streaming iterator for all captures from `gmatch_captures`
603// lifetimes as for Captures above!
604// 'a is pattern, 'b is text, 'c is ref to LuaPattern
605pub struct GMatchCaptures<'a, 'b, 'c>
606where
607    'a: 'c,
608{
609    m: &'c mut LuaPattern<'a>,
610    text: &'b str,
611}
612
613impl<'a, 'b, 'c> Iterator for GMatchCaptures<'a, 'b, 'c>
614where
615    'a: 'c,
616{
617    type Item = CapturesUnsafe<'b>;
618
619    fn next(&mut self) -> Option<Self::Item> {
620        if !self.m.matches(self.text) {
621            None
622        } else {
623            let split = self.text.split_at(self.m.range().end);
624            self.text = split.1;
625            let match_ptr: *const LuaMatch = self.m.matches.as_ptr();
626            Some(CapturesUnsafe {
627                matches: match_ptr,
628                text: split.0,
629            })
630        }
631    }
632}
633
634/// Iterator for all byte slices from `gmatch_bytes`
635pub struct GMatchBytes<'a, 'b> {
636    m: &'a mut LuaPattern<'a>,
637    bytes: &'b [u8],
638}
639
640impl<'a, 'b> Iterator for GMatchBytes<'a, 'b> {
641    type Item = &'b [u8];
642
643    fn next(&mut self) -> Option<Self::Item> {
644        if !self.m.matches_bytes(self.bytes) {
645            None
646        } else {
647            let slice = &self.bytes[self.m.first_capture()];
648            self.bytes = &self.bytes[self.m.range().end..];
649            Some(slice)
650        }
651    }
652}
653
654/// Build a byte Lua pattern, optionally escaping 'magic' characters
655pub struct LuaPatternBuilder {
656    bytes: Vec<u8>,
657}
658
659impl LuaPatternBuilder {
660    /// Create a new Lua pattern builder
661    pub fn new() -> LuaPatternBuilder {
662        LuaPatternBuilder { bytes: Vec::new() }
663    }
664
665    /// Add unescaped characters from a string
666    ///
667    /// ```
668    /// let patt = lua_patterns::LuaPatternBuilder::new()
669    ///     .text("(boo)")
670    ///     .build();
671    /// assert_eq!(std::str::from_utf8(&patt).unwrap(), "(boo)");
672    /// ```
673    pub fn text(&mut self, s: &str) -> &mut Self {
674        self.bytes.extend_from_slice(s.as_bytes());
675        self
676    }
677
678    /// Add unescaped characters from lines
679    ///
680    /// This looks for first non-whitespace run in each line,
681    /// useful for spreading patterns out and commmenting them.
682    /// Works with patterns that use '%s' religiously!
683    ///
684    /// ```
685    /// let patt = lua_patterns::LuaPatternBuilder::new()
686    ///     .text_lines("
687    ///       hello-dolly
688    ///       you-are-fine  # comment
689    ///       cool
690    ///      ")
691    ///     .build();
692    /// assert_eq!(std::str::from_utf8(&patt).unwrap(),
693    ///   "hello-dollyyou-are-finecool");
694    /// ```
695    pub fn text_lines(&mut self, lines: &str) -> &mut Self {
696        let mut text = String::new();
697        for line in lines.lines() {
698            if let Some(first) = line.split_whitespace().next() {
699                text.push_str(first);
700            }
701        }
702        self.text(&text)
703    }
704
705    /// Add escaped bytes from a slice
706    ///
707    /// ```
708    /// let patt = lua_patterns::LuaPatternBuilder::new()
709    ///     .text("^")
710    ///     .bytes(b"^") // magic character!
711    ///     .build();
712    /// assert_eq!(std::str::from_utf8(&patt).unwrap(), "^%^");
713    /// ```
714    pub fn bytes(&mut self, b: &[u8]) -> &mut Self {
715        let mut m = LuaPattern::new("[%-%.%+%[%]%(%)%$%^%%%?%*]");
716        let bb = m.gsub_bytes_with(b, |cc| {
717            let mut res = Vec::new();
718            res.push(b'%');
719            res.push(cc.get(0)[0]);
720            res
721        });
722        self.bytes.extend(bb);
723        self
724    }
725
726    /// Add escaped bytes from hex string
727    ///
728    /// This consists of adjacent pairs of hex digits.
729    ///
730    /// ```
731    /// let patt = lua_patterns::LuaPatternBuilder::new()
732    ///     .text("^")
733    ///     .bytes_as_hex("5E") // which is ASCII '^'
734    ///     .build();
735    /// assert_eq!(std::str::from_utf8(&patt).unwrap(), "^%^");
736    /// ```
737    pub fn bytes_as_hex(&mut self, bs: &str) -> &mut Self {
738        let bb = LuaPatternBuilder::hex_to_bytes(bs);
739        self.bytes(&bb)
740    }
741
742    /// Create the pattern
743    pub fn build(&mut self) -> Vec<u8> {
744        let mut v = Vec::new();
745        std::mem::swap(&mut self.bytes, &mut v);
746        v
747    }
748
749    /// Utility to create a vector of bytes from a hex string
750    ///
751    /// ```
752    /// let bb = lua_patterns::LuaPatternBuilder::hex_to_bytes("AEFE00FE");
753    /// assert_eq!(bb, &[0xAE,0xFE,0x00,0xFE]);
754    /// ```
755    pub fn hex_to_bytes(s: &str) -> Vec<u8> {
756        let mut m = LuaPattern::new("%x%x");
757        m.gmatch(s)
758            .map(|pair| u8::from_str_radix(pair, 16).unwrap())
759            .collect()
760    }
761
762    /// Utility to create a hex string from a slice of bytes
763    ///
764    /// ```
765    /// let hex = lua_patterns::LuaPatternBuilder::bytes_to_hex(&[0xAE,0xFE,0x00,0xFE]);
766    /// assert_eq!(hex,"AEFE00FE");
767    ///
768    /// ```
769    pub fn bytes_to_hex(s: &[u8]) -> String {
770        s.iter().map(|b| format!("{:02X}", b)).collect()
771    }
772}
773
774#[cfg(test)]
775mod tests {
776    use super::*;
777
778    #[test]
779    fn captures_and_matching() {
780        let mut m = LuaPattern::new("(one).+");
781        assert_eq!(m.captures(" one two"), &["one two", "one"]);
782        let empty: &[&str] = &[];
783        assert_eq!(m.captures("four"), empty);
784
785        assert_eq!(m.matches("one dog"), true);
786        assert_eq!(m.matches("dog one "), true);
787        assert_eq!(m.matches("dog one"), false);
788
789        let text = "one dog";
790        let mut m = LuaPattern::new("^(%a+)");
791        assert_eq!(m.matches(text), true);
792        assert_eq!(&text[m.capture(1)], "one");
793        assert_eq!(m.matches(" one dog"), false);
794
795        // captures without allocation
796        m.matches(text);
797        let captures = m.match_captures(text);
798        assert_eq!(captures.get(0), "one");
799        assert_eq!(captures.get(1), "one");
800
801        let mut m = LuaPattern::new("(%S+)%s*=%s*(.+)");
802
803        //  captures as Vec
804        let cc = m.captures(" hello= bonzo dog");
805        assert_eq!(cc[0], "hello= bonzo dog");
806        assert_eq!(cc[1], "hello");
807        assert_eq!(cc[2], "bonzo dog");
808    }
809
810    #[test]
811    fn multiple_captures() {
812        let mut p = LuaPattern::new("%s*(%d+)%s+(%S+)");
813        let (int, rest) = p.match_maybe_2(" 233   hello dolly").unwrap();
814        assert_eq!(int, "233");
815        assert_eq!(rest, "hello");
816    }
817
818    #[test]
819    fn gmatch() {
820        let mut m = LuaPattern::new("%a+");
821        let mut iter = m.gmatch("one two three");
822        assert_eq!(iter.next(), Some("one"));
823        assert_eq!(iter.next(), Some("two"));
824        assert_eq!(iter.next(), Some("three"));
825        assert_eq!(iter.next(), None);
826
827        let mut m = LuaPattern::new("(%a+)");
828        let mut iter = m.gmatch("one two three");
829        assert_eq!(iter.next(), Some("one"));
830        assert_eq!(iter.next(), Some("two"));
831        assert_eq!(iter.next(), Some("three"));
832        assert_eq!(iter.next(), None);
833
834        let mut m = LuaPattern::new("(%a+)");
835        let mut iter = m.gmatch_captures("one two three");
836        assert_eq!(iter.next().unwrap().get(1), "one");
837        assert_eq!(iter.next().unwrap().get(1), "two");
838        assert_eq!(iter.next().unwrap().get(1), "three");
839    }
840
841    #[test]
842    fn gsub() {
843        use std::collections::HashMap;
844
845        let mut m = LuaPattern::new("%$(%S+)");
846        let res = m.gsub_with("hello $dolly you're so $fine!", |cc| {
847            cc.get(1).to_uppercase()
848        });
849        assert_eq!(res, "hello DOLLY you're so FINE!");
850
851        let mut map = HashMap::new();
852        map.insert("dolly", "baby");
853        map.insert("fine", "cool");
854        map.insert("good-looking", "pretty");
855
856        let mut m = LuaPattern::new("%$%((.-)%)");
857        let res = m.gsub_with(
858            "hello $(dolly) you're so $(fine) and $(good-looking)",
859            |cc| map.get(cc.get(1)).unwrap_or(&"?").to_string(),
860        );
861        assert_eq!(res, "hello baby you're so cool and pretty");
862
863        let mut m = LuaPattern::new("%s+");
864        let res = m.gsub("hello dolly you're so fine", "");
865        assert_eq!(res, "hellodollyyou'resofine");
866
867        let mut m = LuaPattern::new("(%S+)%s*=%s*(%S+);%s*");
868        let res = m.gsub("a=2; b=3; c = 4;", "'%2':%1 ");
869        assert_eq!(res, "'2':a '3':b '4':c ");
870    }
871
872    #[test]
873    fn bad_patterns() {
874        let bad = [
875            ("bonzo %", "malformed pattern (ends with '%')"),
876            ("bonzo (dog%(", "unfinished capture"),
877            ("alles [%a%[", "malformed pattern (missing ']')"),
878            ("bonzo (dog (cat)", "unfinished capture"),
879            ("frodo %f[%A", "malformed pattern (missing ']')"),
880            ("frodo (1) (2(3)%2)%1", "invalid capture index %2"),
881        ];
882        for p in bad.iter() {
883            let res = LuaPattern::new_try(p.0);
884            if let Err(e) = res {
885                assert_eq!(e, PatternError::Pattern(p.1.into()));
886            } else {
887                panic!("false positive");
888            }
889        }
890    }
891}