1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
//! [![crates.io version](https://img.shields.io/crates/v/safe-regex-compiler.svg)](https://crates.io/crates/safe-regex-compiler)
//! [![license: Apache 2.0](https://gitlab.com/leonhard-llc/safe-regex-rs/-/raw/main/license-apache-2.0.svg)](http://www.apache.org/licenses/LICENSE-2.0)
//! [![unsafe forbidden](https://gitlab.com/leonhard-llc/safe-regex-rs/-/raw/main/unsafe-forbidden-success.svg)](https://github.com/rust-secure-code/safety-dance/)
//! [![pipeline status](https://gitlab.com/leonhard-llc/safe-regex-rs/badges/main/pipeline.svg)](https://gitlab.com/leonhard-llc/safe-regex-rs/-/pipelines)
//!
//! A regular expression compiler.
//!
//! If you want to use regular expressions in your software, use the
//! [`safe_regex`](https://crates.io/crates/safe-regex) crate.
//!
//! # Cargo Geiger Safety Report
//! # Changelog
//! See [`safe_regex`](https://crates.io/crates/safe-regex) create.
//!
//! # Release Process
//! 1. Edit `Cargo.toml` and bump version number.
//! 1. Run `../release.sh`
#![forbid(unsafe_code)]
use crate::generator::generate;
use safe_proc_macro2::{TokenStream, TokenTree};
pub mod generator;
pub mod parser;
#[macro_export]
macro_rules! dprintln {
// ($($args:tt)+) => { println!( $($args)+ ) };
($($args:tt)+) => {};
}
/// Converts the bytes into an ASCII string.
#[allow(clippy::missing_panics_doc)]
pub fn escape_ascii(input: impl AsRef<[u8]>) -> String {
let mut result = String::new();
for byte in input.as_ref() {
for ascii_byte in core::ascii::escape_default(*byte) {
result.push_str(core::str::from_utf8(&[ascii_byte]).unwrap());
}
}
result
}
/// Implements the `regex!` macro.
///
/// # Errors
/// Returns `Err(String)` with a human-readable description of the problem.
pub fn impl_regex(stream: TokenStream) -> Result<TokenStream, String> {
// Ident { sym: regex }
// Punct { char: '!', spacing: Alone }
// Group {
// delimiter: Parenthesis,
// stream: TokenStream [
// Literal { lit: br"a" }
// ]
// }
const ERR: &str = "expected a raw byte string, like br\"abc\"";
dprintln!(
"impl_regex {:?}",
stream
.clone()
.into_iter()
.map(|tree| format!("{:?} ", tree))
.collect::<String>()
);
let mut stream_iter = stream.into_iter();
let Some(TokenTree::Literal(literal)) = stream_iter.next() else {
return Err(ERR.to_string());
};
if stream_iter.next().is_some() {
return Err(ERR.to_string());
}
// The compiler already parsed the literal, but does not expose its fields.
// So we convert the literal to a string and parse it ourselves.
// https://stackoverflow.com/questions/61169932/how-do-i-get-the-value-and-type-of-a-literal-in-a-rust-proc-macro
let literal_string = literal.to_string();
// println!("compiling safe_regex::regex!({})", literal_string);
let raw_byte_string = literal_string
.strip_prefix("br")
.ok_or_else(|| ERR.to_string())?
// Compiler guarantees that strings are closed.
.trim_start_matches('#')
.trim_start_matches('"')
.trim_end_matches('#')
.trim_end_matches('"');
// The compiler guarantees that a literal byte string contains only ASCII.
// > regex!(br"€"); // error: raw byte string must be ASCII
// Therefore, we can slice the string at any byte offset.
let final_node = crate::parser::parse(raw_byte_string.as_bytes())?;
Ok(generate(&final_node))
}