v_escape/lib.rs
1//! Crate v_escape provides a macro, `new!` that define a `struct` with
2//! escaping functionality. These macros are optimized using simd by default,
3//! but this can be alter using sub-attributes.
4//!
5//! # Quick start
6//! In order to use v_escape you will have to call one of the two macros
7//! to create a escape `struct`. In this example, when using the macro
8//! `new!(MyEscape, "62->bar");` a new a `struct` `MyEscape`
9//! will be created that every time its method `MyEscape::fmt` is called
10//! will replace all characters `">"` with `"bar"`.
11//!
12//! ```
13//! v_escape::new!(MyEscape; 62 -> "bar");
14//!
15//! # fn main() {
16//! # let s = "foo>bar";
17//! let escaped = escape(s);
18//!
19//! print!("{}", escaped);
20//! # }
21//! ```
22//!
23//! ## Pairs syntax
24//! v_escape uses a simple syntax to replace characters
25//! with their respective quotes. The tuple is named `Pair`,
26//! and several can be defined, referred as `Pairs`. The syntax to define
27//! `Pairs` consists of a character, followed
28//! by the delimiter `->`, followed by the substitution quote
29//! and the delimiter ` || ` (last delimiter is optional):
30//!
31//! `([character]->[quote], )*`
32//!
33//! * `character` : Character to substitute. Accepts`i8+` from `0` to `i8::MAX` and
34//! accepts the following formats: decimal (49), hexadecimal (0x31),
35//! octal (0o61) or character (#1).
36//! Note: Numbers are read in ASCII: `#6->foo`
37//!
38//! * `quote` : Characters that will replace `character`
39//!
40//! ```
41//! v_escape::new!(MyEscape; 49 -> "bar");
42//! # fn main() {
43//! assert_eq!(escape("foo 1").to_string(), "foo bar");
44//! # }
45//! ```
46//! ```
47//! v_escape::new!(MyEscape; 0x31 -> "bar");
48//! # fn main() {
49//! assert_eq!(escape("foo 1").to_string(), "foo bar");
50//! # }
51//! ```
52//! ```
53//! v_escape::new!(MyEscape; 0o61 -> "bar");
54//! # fn main() {
55//! assert_eq!(escape("foo 1").to_string(), "foo bar");
56//! # }
57//! ```
58//! ```
59//! v_escape::new!(MyEscape; '1' -> "bar");
60//! # fn main() {
61//! assert_eq!(escape("foo 1").to_string(), "foo bar");
62//! # }
63//! ```
64//!
65//! In the following example more than 16 pairs are given, this exceeds simd's
66//! boundary. If simd optimization is wanted, ranges must be enabled (default)
67//! or an error will be thrown. It is possible to not use ranges but simd
68//! optimization has to be disabled.
69//!
70//! ```
71//! v_escape::new!(
72//! MyEscape;
73//! 62->"b", 60->"f", 'B'->"b", 65->"f", 0o67->"b", '6'->"f", 68->"b",
74//! 71->"f", 72->"b", 73->"f", 74->"b", 75->"f", 76->"b", 77->"f",
75//! 78->"b", 79->"f", 0x1A->"f"
76//! );
77//! # fn main() {
78//! assert_eq!(escape("foo>bar<").to_string(), "foobbarf");
79//! # }
80//! ```
81//!
82//! For debugging purposes, sub-attribute `print`, can be set to `true`
83//! to print generated code
84//!
85//! ```
86//! v_escape::new!(MyEscape; 'o' -> "bar"; print = true);
87//! # fn main() {
88//! # assert_eq!(escape("foo").to_string(), "fbarbar");
89//! # }
90//! ```
91//!
92#![allow(unused_imports)]
93
94pub use buf_min::Buffer;
95pub use v_escape_derive::derive;
96
97#[macro_use]
98mod macros;
99#[macro_use]
100mod scalar;
101#[macro_use]
102mod ranges;
103#[macro_use]
104mod chars;
105
106#[macro_export]
107/// Generates struct `$name` with escaping functionality at `fmt`
108///
109/// It will get as input:
110///
111/// * $__name__: Name of escape class.
112///
113/// * $__pairs__: Pairs of `[character]->[quote] || [character]->[quote]` or
114/// `[character]->[quote]`.
115///
116/// * $__t__: Optional boolean parameters (simd, avx, sse, print).
117/// * __simd__: If true (by default), simd optimizations are enabled. When false,
118/// no matter value of avx, `sse4.2` will be used,
119/// * __avx__: If true (by default), avx optimization are enabled. When false,
120/// `sse2`(if `ranges=true` and `simd=true`) or `scalar`(if `simd=false`) will be used.
121/// * __ranges__: If true (by default), ranges optimizations are enabled. When false,
122/// `sse4.2`(if `simd=true`) or `scalar`(if `simd=false`) will be used.
123/// * __print__: If true (false by default), prints out generated code to console.
124///
125/// and will:
126///
127/// 1. Import `std::fmt::{self, Display, Formatter}`
128///
129/// 2. Define basic struct with attribute `bytes` and `Escape`
130/// derive functionality
131///
132/// 3. Implements for `$name` constructors `new` and `From<&'a str>`
133///
134/// 4. Implements trait `Display` for `$name` with escape functionality
135///
136/// 5. Implements function `escape(&str) -> $name`
137///
138/// #### Example
139///
140/// ```
141/// v_escape::new!(MyEscape; 'o' -> "bar");
142///
143/// # fn main() {
144/// assert_eq!(escape("foobar").to_string(), "fbarbarbar");
145/// # }
146/// ```
147///
148macro_rules! new {
149 // Macro called without attributes
150 ($name:ident; $($t:tt)+) => {
151 $crate::derive!($($t)+);
152 $crate::escape_new!($name);
153 };
154}
155
156#[macro_export]
157#[doc(hidden)]
158/// Escape implementation
159///
160/// Generates function new, and traits From and Display, for class `$name`
161macro_rules! escape_new {
162 ($name:ident) => {
163 pub struct $name<'a> {
164 bytes: &'a [u8],
165 }
166
167 impl<'a> $name<'a> {
168 #[inline]
169 pub fn new(bytes: &[u8]) -> $name {
170 $name { bytes }
171 }
172
173 #[inline]
174 pub fn f_escape(&self, buf: &mut [std::mem::MaybeUninit<u8>]) -> Option<usize> {
175 #[allow(unused_unsafe)]
176 unsafe {
177 _f_escape(self.bytes, buf)
178 }
179 }
180 }
181
182 impl<'a> From<&'a str> for $name<'a> {
183 #[inline]
184 fn from(s: &str) -> $name {
185 $name {
186 bytes: s.as_bytes(),
187 }
188 }
189 }
190
191 #[inline]
192 pub fn escape(s: &str) -> $name {
193 $name::from(s)
194 }
195
196 impl<'a> std::fmt::Display for $name<'a> {
197 fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
198 #[allow(unused_unsafe)]
199 unsafe {
200 _escape(self.bytes, fmt)
201 }
202 }
203 }
204
205 #[inline]
206 pub fn escape_char(c: char) -> impl std::fmt::Display {
207 struct EscapeChar(char);
208
209 impl std::fmt::Display for EscapeChar {
210 fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
211 chars::escape_char(self.0, fmt)
212 }
213 }
214
215 EscapeChar(c)
216 }
217
218 #[inline]
219 pub fn f_escape(s: &[u8], buf: &mut [std::mem::MaybeUninit<u8>]) -> Option<usize> {
220 #[allow(unused_unsafe)]
221 unsafe {
222 _f_escape(s, buf)
223 }
224 }
225
226 #[inline]
227 pub fn f_escape_char(c: char, buf: &mut [std::mem::MaybeUninit<u8>]) -> Option<usize> {
228 #[allow(unused_unsafe)]
229 unsafe {
230 chars::f_escape_char(c, buf)
231 }
232 }
233
234 /// Escape byte slice to `Buffer`
235 ///
236 /// # SIGILL
237 /// Can produce **SIGILL** if compile with `sse2` or `avx2` and execute without they
238 /// Because not exist way to build multiple static allocations by type
239 /// And it's very expensive check it in runtime
240 /// https://github.com/rust-lang/rust/issues/57775
241 #[inline]
242 pub fn b_escape<B: $crate::Buffer>(s: &[u8], buf: &mut B) {
243 #[allow(unused_unsafe)]
244 unsafe {
245 _b_escape(s, buf)
246 }
247 }
248
249 /// Escape char to `buf-min::Buffer`
250 #[inline]
251 pub fn b_escape_char<B: $crate::Buffer>(s: char, buf: &mut B) {
252 #[allow(unused_unsafe)]
253 unsafe {
254 chars::b_escape_char(s, buf)
255 }
256 }
257 };
258}
259
260#[macro_export]
261#[doc(hidden)]
262/// cfg_if for escape function
263macro_rules! cfg_escape {
264 (false, $($t:tt)+) => {
265 $crate::cfg_escape!(fn);
266 };
267 (true, $($t:tt)+) => {
268 #[cfg(target_arch = "x86_64")]
269 #[inline(always)]
270 // https://github.com/BurntSushi/rust-memchr/blob/master/src/x86/mod.rs#L9-L29
271 fn _escape(bytes: &[u8], fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
272 use std::mem;
273 use std::sync::atomic::{AtomicUsize, Ordering};
274 use std::fmt::{self, Formatter};
275 static mut FN: fn(&[u8], &mut Formatter) -> fmt::Result = detect;
276
277 fn detect(bytes: &[u8], fmt: &mut Formatter) -> fmt::Result {
278 let fun = $crate::cfg_escape!(if $($t)+);
279
280 let slot = unsafe { &*(&FN as *const _ as *const AtomicUsize) };
281 slot.store(fun, Ordering::Relaxed);
282 unsafe {
283 mem::transmute::<usize, fn(&[u8], &mut Formatter) -> fmt::Result>(fun)(
284 bytes, fmt,
285 )
286 }
287 }
288
289 unsafe {
290 let slot = &*(&FN as *const _ as *const AtomicUsize);
291 let fun = slot.load(Ordering::Relaxed);
292 mem::transmute::<usize, fn(&[u8], &mut Formatter) -> fmt::Result>(fun)(bytes, fmt)
293 }
294 }
295
296 #[cfg(not(target_arch = "x86_64"))]
297 $crate::cfg_escape!(fn);
298 };
299 (fn) => {
300 #[inline(always)]
301 fn _escape(bytes: &[u8], fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
302 scalar::escape(bytes, fmt)
303 }
304 };
305 (if true) => {
306 if is_x86_feature_detected!("avx2") {
307 ranges::avx::escape as usize
308 } else if is_x86_feature_detected!("sse2") {
309 ranges::sse::escape as usize
310 } else {
311 scalar::escape as usize
312 }
313 };
314 (if false) => {
315 if is_x86_feature_detected!("sse2") {
316 ranges::sse::escape as usize
317 } else {
318 scalar::escape as usize
319 }
320 };
321}
322
323#[macro_export]
324#[doc(hidden)]
325/// cfg_if for escape function
326macro_rules! cfg_escape_ptr {
327 (false, $($t:tt)+) => {
328 $crate::cfg_escape_ptr!(fn);
329 };
330 (true, $($t:tt)+) => {
331 #[cfg(target_arch = "x86_64")]
332 #[inline(always)]
333 #[allow(unreachable_code)]
334 // https://github.com/BurntSushi/rust-memchr/blob/master/src/x86/mod.rs#L9-L29
335 pub unsafe fn _f_escape(bytes: &[u8], buf: &mut [std::mem::MaybeUninit<u8>]) -> Option<usize> {
336 use std::mem;
337 use std::sync::atomic::{AtomicUsize, Ordering};
338 static mut FN: fn(&[u8], &mut [std::mem::MaybeUninit<u8>]) -> Option<usize> = detect;
339
340 fn detect(bytes: &[u8], buf: &mut [std::mem::MaybeUninit<u8>]) -> Option<usize> {
341 let fun = $crate::cfg_escape_ptr!(if $($t)+);
342
343 let slot = unsafe { &*(&FN as *const _ as *const AtomicUsize) };
344 slot.store(fun, Ordering::Relaxed);
345 unsafe {
346 mem::transmute::<usize, fn(&[u8], &mut [std::mem::MaybeUninit<u8>]) -> Option<usize>>(fun)(
347 bytes, buf,
348 )
349 }
350 }
351
352 unsafe {
353 let slot = &*(&FN as *const _ as *const AtomicUsize);
354 let fun = slot.load(Ordering::Relaxed);
355 mem::transmute::<usize, fn(&[u8], &mut [std::mem::MaybeUninit<u8>]) -> Option<usize>>(fun)(bytes, buf)
356 }
357 }
358
359 #[cfg(not(target_arch = "x86_64"))]
360 $crate::cfg_escape_ptr!(fn);
361 };
362 (fn) => {
363 #[inline(always)]
364 pub unsafe fn _f_escape(bytes: &[u8], buf: &mut [std::mem::MaybeUninit<u8>]) -> Option<usize> {
365 scalar::f_escape(bytes, buf)
366 }
367 };
368 (if true) => {
369 if is_x86_feature_detected!("avx2") {
370 ranges::avx::f_escape as usize
371 } else if is_x86_feature_detected!("sse2") {
372 ranges::sse::f_escape as usize
373 } else {
374 scalar::f_escape as usize
375 }
376 };
377 (if false) => {
378 if is_x86_feature_detected!("sse2") {
379 ranges::sse::f_escape as usize
380 } else {
381 scalar::f_escape as usize
382 }
383 };
384}
385
386#[macro_export]
387#[doc(hidden)]
388/// cfg_if for escape function
389macro_rules! cfg_escape_bytes {
390 (false, $($t:tt)+) => {
391 $crate::cfg_escape_bytes!(fn);
392 };
393 (true, $($t:tt)+) => {
394 #[cfg(target_arch = "x86_64")]
395 #[inline(always)]
396 pub unsafe fn _b_escape<B: $crate::Buffer>(bytes: &[u8], buf: &mut B) {
397 $crate::cfg_escape_bytes!(if $($t)+, bytes, buf)
398 }
399
400 #[cfg(not(all(target_arch = "x86_64", not(b_escape_nosimd))))]
401 $crate::cfg_escape_bytes!(fn);
402 };
403 (fn) => {
404 #[inline(always)]
405 pub unsafe fn _b_escape<B: $crate::Buffer>(bytes: &[u8], buf: &mut B) {
406 scalar::b_escape(bytes, buf)
407 }
408 };
409 (if true, $bytes:ident, $buf:ident) => {{
410 #[cfg(not(v_escape_avx))] {
411 #[cfg(not(v_escape_sse))] {
412 scalar::b_escape($bytes, $buf)
413 }
414 #[cfg(v_escape_sse)] {
415 ranges::sse::b_escape($bytes, $buf)
416 }
417 }
418 #[cfg(v_escape_avx)] {
419 ranges::avx::b_escape($bytes, $buf)
420 }
421 }};
422 (if false, $bytes:ident, $buf:ident) => {{
423 #[cfg(not(v_escape_sse))] {
424 scalar::b_escape($bytes, $buf)
425 }
426 #[cfg(v_escape_sse)] {
427 ranges::sse::b_escape($bytes, $buf)
428 }
429 }};
430}
431
432#[cfg(doctest)]
433mod test_readme {
434 macro_rules! external_doc_test {
435 ($x:expr) => {
436 #[doc = $x]
437 extern "C" {}
438 };
439 }
440
441 external_doc_test!(include_str!("../../README.md"));
442}