v_escape/
lib.rs

1//! Crate v_escape provides a macro, `new!` that define a `struct` with
2//! escaping functionality. These macros are optimized using simd by default,
3//! but this can be alter using sub-attributes.
4//!
5//! # Quick start
6//! In order to use v_escape you will have to call one of the two macros
7//! to create a escape `struct`. In this example, when using the macro
8//! `new!(MyEscape, "62->bar");` a new a `struct` `MyEscape`
9//! will be created that every time its method `MyEscape::fmt` is called
10//! will replace all characters `">"` with `"bar"`.
11//!
12//! ```
13//! v_escape::new!(MyEscape; 62 -> "bar");
14//!
15//! # fn main() {
16//! # let s = "foo>bar";
17//! let escaped = escape(s);
18//!
19//! print!("{}", escaped);
20//! # }
21//! ```
22//!
23//! ## Pairs syntax
24//! v_escape uses a simple syntax to replace characters
25//! with their respective quotes. The tuple is named `Pair`,
26//! and several can be defined, referred as `Pairs`. The syntax to define
27//! `Pairs` consists of a character, followed
28//! by the delimiter `->`, followed by the substitution quote
29//! and the delimiter ` || ` (last delimiter is optional):
30//!
31//!    `([character]->[quote], )*`
32//!
33//! * `character` :   Character to substitute. Accepts`i8+` from `0` to `i8::MAX` and
34//!                 accepts the following formats: decimal (49), hexadecimal (0x31),
35//!                 octal (0o61) or character (#1).
36//!                 Note: Numbers are read in ASCII: `#6->foo`
37//!
38//! * `quote` :   Characters that will replace `character`
39//!
40//! ```
41//! v_escape::new!(MyEscape; 49 -> "bar");
42//! # fn main() {
43//! assert_eq!(escape("foo 1").to_string(), "foo bar");
44//! # }
45//! ```
46//! ```
47//! v_escape::new!(MyEscape; 0x31 -> "bar");
48//! # fn main() {
49//! assert_eq!(escape("foo 1").to_string(), "foo bar");
50//! # }
51//! ```
52//! ```
53//! v_escape::new!(MyEscape; 0o61 -> "bar");
54//! # fn main() {
55//! assert_eq!(escape("foo 1").to_string(), "foo bar");
56//! # }
57//! ```
58//! ```
59//! v_escape::new!(MyEscape; '1' -> "bar");
60//! # fn main() {
61//! assert_eq!(escape("foo 1").to_string(), "foo bar");
62//! # }
63//! ```
64//!
65//! In the following example more than 16 pairs are given, this exceeds simd's
66//! boundary. If simd optimization is wanted, ranges must be enabled (default)
67//! or an error will be thrown. It is possible to not use ranges but simd
68//! optimization has to be disabled.
69//!
70//! ```
71//! v_escape::new!(
72//!     MyEscape;
73//!     62->"b",  60->"f",  'B'->"b",  65->"f",  0o67->"b",  '6'->"f",  68->"b",
74//!     71->"f",  72->"b",  73->"f",  74->"b",  75->"f",  76->"b",  77->"f",
75//!     78->"b",  79->"f",  0x1A->"f"
76//! );
77//! # fn main() {
78//! assert_eq!(escape("foo>bar<").to_string(), "foobbarf");
79//! # }
80//! ```
81//!
82//! For debugging purposes, sub-attribute `print`, can be set to `true`
83//! to print generated code
84//!
85//! ```
86//! v_escape::new!(MyEscape; 'o' -> "bar"; print = true);
87//! # fn main() {
88//! # assert_eq!(escape("foo").to_string(), "fbarbar");
89//! # }
90//! ```
91//!
92#![allow(unused_imports)]
93
94pub use buf_min::Buffer;
95pub use v_escape_derive::derive;
96
97#[macro_use]
98mod macros;
99#[macro_use]
100mod scalar;
101#[macro_use]
102mod ranges;
103#[macro_use]
104mod chars;
105
106#[macro_export]
107/// Generates struct `$name` with escaping functionality at `fmt`
108///
109/// It will get as input:
110///
111/// * $__name__: Name of escape class.
112///
113/// * $__pairs__: Pairs of `[character]->[quote] || [character]->[quote]` or
114///              `[character]->[quote]`.
115///
116/// * $__t__: Optional boolean parameters (simd, avx, sse, print).
117///     * __simd__:  If true (by default), simd optimizations are enabled. When false,
118///         no matter value of avx, `sse4.2` will be used,
119///     * __avx__:   If true (by default), avx optimization are enabled. When false,
120///         `sse2`(if `ranges=true` and `simd=true`) or `scalar`(if `simd=false`) will be used.
121///     * __ranges__:   If true (by default), ranges optimizations are enabled. When false,
122///         `sse4.2`(if `simd=true`) or `scalar`(if `simd=false`) will be used.
123///     * __print__: If true (false by default), prints out generated code to console.
124///
125/// and will:
126///
127/// 1. Import `std::fmt::{self, Display, Formatter}`
128///
129/// 2. Define basic struct with attribute `bytes` and `Escape`
130///    derive functionality
131///
132/// 3. Implements for `$name` constructors `new` and `From<&'a str>`
133///
134/// 4. Implements trait `Display` for `$name` with escape functionality
135///
136/// 5. Implements function `escape(&str) -> $name`
137///
138/// #### Example
139///
140/// ```
141/// v_escape::new!(MyEscape; 'o' -> "bar");
142///
143/// # fn main() {
144/// assert_eq!(escape("foobar").to_string(), "fbarbarbar");
145/// # }
146/// ```
147///
148macro_rules! new {
149    // Macro called without attributes
150    ($name:ident; $($t:tt)+) => {
151        $crate::derive!($($t)+);
152        $crate::escape_new!($name);
153    };
154}
155
156#[macro_export]
157#[doc(hidden)]
158/// Escape implementation
159///
160/// Generates function new, and traits From and Display, for class `$name`
161macro_rules! escape_new {
162    ($name:ident) => {
163        pub struct $name<'a> {
164            bytes: &'a [u8],
165        }
166
167        impl<'a> $name<'a> {
168            #[inline]
169            pub fn new(bytes: &[u8]) -> $name {
170                $name { bytes }
171            }
172
173            #[inline]
174            pub fn f_escape(&self, buf: &mut [std::mem::MaybeUninit<u8>]) -> Option<usize> {
175                #[allow(unused_unsafe)]
176                unsafe {
177                    _f_escape(self.bytes, buf)
178                }
179            }
180        }
181
182        impl<'a> From<&'a str> for $name<'a> {
183            #[inline]
184            fn from(s: &str) -> $name {
185                $name {
186                    bytes: s.as_bytes(),
187                }
188            }
189        }
190
191        #[inline]
192        pub fn escape(s: &str) -> $name {
193            $name::from(s)
194        }
195
196        impl<'a> std::fmt::Display for $name<'a> {
197            fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
198                #[allow(unused_unsafe)]
199                unsafe {
200                    _escape(self.bytes, fmt)
201                }
202            }
203        }
204
205        #[inline]
206        pub fn escape_char(c: char) -> impl std::fmt::Display {
207            struct EscapeChar(char);
208
209            impl std::fmt::Display for EscapeChar {
210                fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
211                    chars::escape_char(self.0, fmt)
212                }
213            }
214
215            EscapeChar(c)
216        }
217
218        #[inline]
219        pub fn f_escape(s: &[u8], buf: &mut [std::mem::MaybeUninit<u8>]) -> Option<usize> {
220            #[allow(unused_unsafe)]
221            unsafe {
222                _f_escape(s, buf)
223            }
224        }
225
226        #[inline]
227        pub fn f_escape_char(c: char, buf: &mut [std::mem::MaybeUninit<u8>]) -> Option<usize> {
228            #[allow(unused_unsafe)]
229            unsafe {
230                chars::f_escape_char(c, buf)
231            }
232        }
233
234        /// Escape byte slice to `Buffer`
235        ///
236        /// # SIGILL
237        /// Can produce **SIGILL** if compile with `sse2` or `avx2` and execute without they
238        /// Because not exist way to build multiple static allocations by type
239        /// And it's very expensive check it in runtime
240        /// https://github.com/rust-lang/rust/issues/57775
241        #[inline]
242        pub fn b_escape<B: $crate::Buffer>(s: &[u8], buf: &mut B) {
243            #[allow(unused_unsafe)]
244            unsafe {
245                _b_escape(s, buf)
246            }
247        }
248
249        /// Escape char to `buf-min::Buffer`
250        #[inline]
251        pub fn b_escape_char<B: $crate::Buffer>(s: char, buf: &mut B) {
252            #[allow(unused_unsafe)]
253            unsafe {
254                chars::b_escape_char(s, buf)
255            }
256        }
257    };
258}
259
260#[macro_export]
261#[doc(hidden)]
262/// cfg_if for escape function
263macro_rules! cfg_escape {
264    (false, $($t:tt)+) => {
265        $crate::cfg_escape!(fn);
266    };
267    (true, $($t:tt)+) => {
268        #[cfg(target_arch = "x86_64")]
269        #[inline(always)]
270        // https://github.com/BurntSushi/rust-memchr/blob/master/src/x86/mod.rs#L9-L29
271        fn _escape(bytes: &[u8], fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
272            use std::mem;
273            use std::sync::atomic::{AtomicUsize, Ordering};
274            use std::fmt::{self, Formatter};
275            static mut FN: fn(&[u8], &mut Formatter) -> fmt::Result = detect;
276
277            fn detect(bytes: &[u8], fmt: &mut Formatter) -> fmt::Result {
278                let fun = $crate::cfg_escape!(if $($t)+);
279
280                let slot = unsafe { &*(&FN as *const _ as *const AtomicUsize) };
281                slot.store(fun, Ordering::Relaxed);
282                unsafe {
283                    mem::transmute::<usize, fn(&[u8], &mut Formatter) -> fmt::Result>(fun)(
284                        bytes, fmt,
285                    )
286                }
287            }
288
289            unsafe {
290                let slot = &*(&FN as *const _ as *const AtomicUsize);
291                let fun = slot.load(Ordering::Relaxed);
292                mem::transmute::<usize, fn(&[u8], &mut Formatter) -> fmt::Result>(fun)(bytes, fmt)
293            }
294        }
295
296        #[cfg(not(target_arch = "x86_64"))]
297        $crate::cfg_escape!(fn);
298    };
299    (fn) => {
300        #[inline(always)]
301        fn _escape(bytes: &[u8], fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
302            scalar::escape(bytes, fmt)
303        }
304    };
305    (if true) => {
306        if is_x86_feature_detected!("avx2") {
307            ranges::avx::escape as usize
308        } else if is_x86_feature_detected!("sse2") {
309            ranges::sse::escape as usize
310        } else {
311            scalar::escape as usize
312        }
313    };
314    (if false) => {
315        if is_x86_feature_detected!("sse2") {
316            ranges::sse::escape as usize
317        } else {
318            scalar::escape as usize
319        }
320    };
321}
322
323#[macro_export]
324#[doc(hidden)]
325/// cfg_if for escape function
326macro_rules! cfg_escape_ptr {
327    (false, $($t:tt)+) => {
328        $crate::cfg_escape_ptr!(fn);
329    };
330    (true, $($t:tt)+) => {
331        #[cfg(target_arch = "x86_64")]
332        #[inline(always)]
333        #[allow(unreachable_code)]
334        // https://github.com/BurntSushi/rust-memchr/blob/master/src/x86/mod.rs#L9-L29
335        pub unsafe fn _f_escape(bytes: &[u8], buf: &mut [std::mem::MaybeUninit<u8>]) -> Option<usize> {
336            use std::mem;
337            use std::sync::atomic::{AtomicUsize, Ordering};
338            static mut FN: fn(&[u8], &mut [std::mem::MaybeUninit<u8>]) -> Option<usize> = detect;
339
340            fn detect(bytes: &[u8], buf: &mut [std::mem::MaybeUninit<u8>]) -> Option<usize> {
341                let fun = $crate::cfg_escape_ptr!(if $($t)+);
342
343                let slot = unsafe { &*(&FN as *const _ as *const AtomicUsize) };
344                slot.store(fun, Ordering::Relaxed);
345                unsafe {
346                    mem::transmute::<usize, fn(&[u8], &mut [std::mem::MaybeUninit<u8>]) -> Option<usize>>(fun)(
347                        bytes, buf,
348                    )
349                }
350            }
351
352            unsafe {
353                let slot = &*(&FN as *const _ as *const AtomicUsize);
354                let fun = slot.load(Ordering::Relaxed);
355                mem::transmute::<usize, fn(&[u8], &mut [std::mem::MaybeUninit<u8>]) -> Option<usize>>(fun)(bytes, buf)
356            }
357        }
358
359        #[cfg(not(target_arch = "x86_64"))]
360        $crate::cfg_escape_ptr!(fn);
361    };
362    (fn) => {
363        #[inline(always)]
364        pub unsafe fn _f_escape(bytes: &[u8], buf: &mut [std::mem::MaybeUninit<u8>]) -> Option<usize> {
365            scalar::f_escape(bytes, buf)
366        }
367    };
368    (if true) => {
369        if is_x86_feature_detected!("avx2") {
370            ranges::avx::f_escape as usize
371        } else if is_x86_feature_detected!("sse2") {
372            ranges::sse::f_escape as usize
373        } else {
374            scalar::f_escape as usize
375        }
376    };
377    (if false) => {
378        if is_x86_feature_detected!("sse2") {
379            ranges::sse::f_escape as usize
380        } else {
381            scalar::f_escape as usize
382        }
383    };
384}
385
386#[macro_export]
387#[doc(hidden)]
388/// cfg_if for escape function
389macro_rules! cfg_escape_bytes {
390    (false, $($t:tt)+) => {
391        $crate::cfg_escape_bytes!(fn);
392    };
393    (true, $($t:tt)+) => {
394        #[cfg(target_arch = "x86_64")]
395        #[inline(always)]
396        pub unsafe fn _b_escape<B: $crate::Buffer>(bytes: &[u8], buf: &mut B) {
397            $crate::cfg_escape_bytes!(if $($t)+, bytes, buf)
398        }
399
400        #[cfg(not(all(target_arch = "x86_64", not(b_escape_nosimd))))]
401        $crate::cfg_escape_bytes!(fn);
402    };
403    (fn) => {
404        #[inline(always)]
405        pub unsafe fn _b_escape<B: $crate::Buffer>(bytes: &[u8], buf: &mut B) {
406            scalar::b_escape(bytes, buf)
407        }
408    };
409    (if true, $bytes:ident, $buf:ident) => {{
410        #[cfg(not(v_escape_avx))] {
411            #[cfg(not(v_escape_sse))] {
412                scalar::b_escape($bytes, $buf)
413            }
414            #[cfg(v_escape_sse)] {
415                ranges::sse::b_escape($bytes, $buf)
416            }
417        }
418        #[cfg(v_escape_avx)] {
419            ranges::avx::b_escape($bytes, $buf)
420        }
421    }};
422    (if false, $bytes:ident, $buf:ident) => {{
423        #[cfg(not(v_escape_sse))] {
424            scalar::b_escape($bytes, $buf)
425        }
426        #[cfg(v_escape_sse)] {
427            ranges::sse::b_escape($bytes, $buf)
428        }
429    }};
430}
431
432#[cfg(doctest)]
433mod test_readme {
434    macro_rules! external_doc_test {
435        ($x:expr) => {
436            #[doc = $x]
437            extern "C" {}
438        };
439    }
440
441    external_doc_test!(include_str!("../../README.md"));
442}