safe_regex_compiler/lib.rs
1//! [](https://crates.io/crates/safe-regex-compiler)
2//! [](http://www.apache.org/licenses/LICENSE-2.0)
3//! [](https://github.com/rust-secure-code/safety-dance/)
4//! [](https://gitlab.com/leonhard-llc/safe-regex-rs/-/pipelines)
5//!
6//! A regular expression compiler.
7//!
8//! If you want to use regular expressions in your software, use the
9//! [`safe_regex`](https://crates.io/crates/safe-regex) crate.
10//!
11//! # Cargo Geiger Safety Report
12//! # Changelog
13//! See [`safe_regex`](https://crates.io/crates/safe-regex) create.
14//!
15//! # Release Process
16//! 1. Edit `Cargo.toml` and bump version number.
17//! 1. Run `../release.sh`
18#![forbid(unsafe_code)]
19use crate::generator::generate;
20use safe_proc_macro2::{TokenStream, TokenTree};
21
22pub mod generator;
23pub mod parser;
24
25#[macro_export]
26macro_rules! dprintln {
27 // ($($args:tt)+) => { println!( $($args)+ ) };
28 ($($args:tt)+) => {};
29}
30
31/// Converts the bytes into an ASCII string.
32#[allow(clippy::missing_panics_doc)]
33pub fn escape_ascii(input: impl AsRef<[u8]>) -> String {
34 let mut result = String::new();
35 for byte in input.as_ref() {
36 for ascii_byte in core::ascii::escape_default(*byte) {
37 result.push_str(core::str::from_utf8(&[ascii_byte]).unwrap());
38 }
39 }
40 result
41}
42
43/// Implements the `regex!` macro.
44///
45/// # Errors
46/// Returns `Err(String)` with a human-readable description of the problem.
47pub fn impl_regex(stream: TokenStream) -> Result<TokenStream, String> {
48 // Ident { sym: regex }
49 // Punct { char: '!', spacing: Alone }
50 // Group {
51 // delimiter: Parenthesis,
52 // stream: TokenStream [
53 // Literal { lit: br"a" }
54 // ]
55 // }
56 const ERR: &str = "expected a raw byte string, like br\"abc\"";
57 dprintln!(
58 "impl_regex {:?}",
59 stream
60 .clone()
61 .into_iter()
62 .map(|tree| format!("{:?} ", tree))
63 .collect::<String>()
64 );
65 let mut stream_iter = stream.into_iter();
66 let Some(TokenTree::Literal(literal)) = stream_iter.next() else {
67 return Err(ERR.to_string());
68 };
69 if stream_iter.next().is_some() {
70 return Err(ERR.to_string());
71 }
72
73 // The compiler already parsed the literal, but does not expose its fields.
74 // So we convert the literal to a string and parse it ourselves.
75 // https://stackoverflow.com/questions/61169932/how-do-i-get-the-value-and-type-of-a-literal-in-a-rust-proc-macro
76 let literal_string = literal.to_string();
77 // println!("compiling safe_regex::regex!({})", literal_string);
78 let raw_byte_string = literal_string
79 .strip_prefix("br")
80 .ok_or_else(|| ERR.to_string())?
81 // Compiler guarantees that strings are closed.
82 .trim_start_matches('#')
83 .trim_start_matches('"')
84 .trim_end_matches('#')
85 .trim_end_matches('"');
86 // The compiler guarantees that a literal byte string contains only ASCII.
87 // > regex!(br"€"); // error: raw byte string must be ASCII
88 // Therefore, we can slice the string at any byte offset.
89 let final_node = crate::parser::parse(raw_byte_string.as_bytes())?;
90 Ok(generate(&final_node))
91}