cluCStr/
lib.rs

1//Copyright 2019-2024 #UlinProject Denis Kotlyarov (Денис Котляров)
2
3//Licensed under the Apache License, Version 2.0 (the "License");
4//you may not use this file except in compliance with the License.
5//You may obtain a copy of the License at
6
7//	   http://www.apache.org/licenses/LICENSE-2.0
8
9//Unless required by applicable law or agreed to in writing, software
10//distributed under the License is distributed on an "AS IS" BASIS,
11//WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![allow(non_snake_case)]
16#![allow(clippy::redundant_static_lifetimes)]
17#![allow(clippy::tabs_in_doc_comments)]
18#![allow(clippy::needless_doctest_main)]
19
20//#Ulin Project (17 1819) - 2024
21//
22
23/*!
24Safe and efficient creation of "CStr" with zero-byte checking and support for concatenating multiple values.
25
26## Note:
27
28You can use `c"wow"` since Rust 1.77.0 instead of `cstr!("wow")` from this crate. This new feature provides more concise code and faster compilation. If you are using an older Rust API (like 1.66), this crate will still be relevant for some time.
29
30## Example:
31```rust
32use cluCStr::cstr;
33use core::ffi::CStr;
34
35fn main() {
36	let cstr = cstr!(b"How are you?");
37	
38	assert_eq!(cstr.to_bytes_with_nul(), b"How are you?\0");
39}
40```
41*/
42
43#![no_std]
44
45extern crate proc_macro;
46extern crate alloc;
47
48use core::ffi::CStr;
49use alloc::borrow::Cow;
50use alloc::string::{String, ToString};
51use core::num::NonZeroU8;
52use alloc::vec::Vec;
53use quote::quote;
54use proc_macro::TokenStream;
55use proc_macro2::{TokenTree as TokenTree2, Literal, Span};
56
57/// Returns tokens that generate a compilation error with the given message 
58/// in the specified source code range.
59#[inline]
60fn __make_pm_compile_error(span: Span, message: &str) -> TokenStream {
61	TokenStream::from(quote::quote_spanned! {
62		span =>
63		compile_error! { #message }
64	})
65}
66
67/// The macro creates a tree with a single compile_error macro and with 
68/// the corrected span and error message.
69macro_rules! pm_compile_error {
70	($span: expr, $e: expr) => {{
71		return __make_pm_compile_error($span, $e);
72	}};
73}
74
75/// Checks for null bytes in the data being processed and aborts with an error 
76/// message if one is detected.
77macro_rules! thiserr_nullbyte {
78	[
79		$lit: ident, $e: expr $(,)?
80	] => {{
81		let e: Result<(), ErrDetectedNullByte> = $e; // only ErrDetectedNullByte
82		
83		if e.is_err() {
84			pm_compile_error!($lit.span(), "Format convention error, null byte detected.");
85		}
86	}};
87}
88
89/// A marker that determines the presence of a zero byte (error) in the 
90/// formation of CSTR.
91struct ErrDetectedNullByte;
92
93/// The SafeCStrBuilder struct provides an interface for safely creating C-compatible 
94/// strings (strings that are terminated by a null byte). 
95/// It guarantees that all data is valid (does not contain any null bytes), 
96/// except for the trailing null byte.
97struct SafeCStrBuilder(Vec<u8>);
98
99impl SafeCStrBuilder {
100	/// Creates an empty array without allocations.
101	#[inline(always)]
102	pub const fn empty() -> Self {
103		SafeCStrBuilder(Vec::new())
104	}
105	
106	/// Pushes a byte to the CSTR.
107	///
108	/// # Errors
109	///
110	/// Returns an error if the byte contains a null byte.
111	#[inline(always)]
112	pub fn push(&mut self, a: u8) -> Result<(), ErrDetectedNullByte> {
113		match NonZeroU8::new(a) {
114			Some(a) => {
115				self.push_nonzero(a);
116				
117				Ok(())
118			},
119			None => Err(ErrDetectedNullByte)
120		}
121	}
122	
123	/// Pushes a non-zero byte to the CSTR.
124	#[inline]
125	pub fn push_nonzero(&mut self, a: NonZeroU8) {
126		self.0.push(a.get())
127	}
128	
129	/// Checks if the CSTR is empty.
130	#[inline]
131	pub fn is_empty(&self) -> bool {
132		self.0.is_empty()
133	}
134	
135	/// Returns a fragment of CSTR bytes. 
136	/// 
137	/// (always without trailing null byte)
138	#[inline]
139	#[allow(dead_code)]
140	pub fn as_slice(&self) -> &[u8] {
141		&self.0
142	}
143	
144	/// Extends the CSTR with a slice of bytes.
145	///
146	/// # Errors
147	///
148	/// Returns an error if the slice contains a null byte.
149	pub fn extend_from_slice(&mut self, arr: &[u8]) -> Result<(), ErrDetectedNullByte> {
150		match memchr::memchr(0, arr) {
151			Some(..) => Err(ErrDetectedNullByte),
152			None => {
153				self.0.extend_from_slice(arr);
154				
155				Ok(())
156			}
157		}
158	}
159	
160	/// Converts the CSTR into a COW slice of bytes.
161	/// 
162	/// !Note that if the string is empty, no allocation occurs and a single 
163	/// generic empty CSTR is returned.
164	pub fn into(mut self) -> Cow<'static, [u8]> {
165		match self.is_empty() {
166			true => {
167				/// Generic empty CSTR.
168				static ECSSTR: &'static [u8] = &[0u8];
169				
170				Cow::Borrowed(ECSSTR)
171			},
172			false => {
173				self.0.push(0);
174		
175				self.0.into()
176			}
177		}
178	}
179	
180	/// Validates the SafeDataCSTR.
181	///
182	/// !Calls the valid function if the CSTR is valid (does not contain a null byte).
183	/// !Calls the invalid function if the CSTR is invalid (contains a null byte).
184	#[inline]
185	pub fn validate_with_fns<R>(
186		&self,
187		
188		valid: impl FnOnce() -> R,
189		invalid: impl FnOnce(usize) -> R
190	) -> R {
191		match memchr::memchr(0, &self.0) {
192			Some(a) => invalid(a),
193			None => valid(),
194		}
195	}
196	
197	/// Checks if the CSTR is valid (does not contain a null byte).
198	#[inline]
199	pub fn is_valid(&self) -> bool {
200		self.validate_with_fns(
201			|| true, // valid
202			|_| false, // invalid
203		)
204	}
205}
206
207/// Safe and efficient creation of “CStr” with zero-byte checking and support for concatenating multiple values.
208/// 
209/// ```rust
210/// use cluCStr::cstr;
211/// use core::ffi::CStr;
212/// 
213/// assert_eq!(cstr!("test").to_bytes(), b"test");
214/// assert_eq!(cstr!(b"test", 1).to_bytes(), b"test1");
215/// assert_eq!(cstr!("test1", "test", 2).to_bytes(), b"test1test2");
216/// assert_eq!(cstr!(1u8).to_bytes(), &[1u8] as &[u8]);
217/// assert_eq!(cstr!(1u8, 2u8, 3u8,).to_bytes(), &[1u8, 2, 3] as &[u8]);
218/// assert_eq!(cstr!(1).to_bytes(), b"1");
219/// assert_eq!(cstr!(1, 2, 3, 4, 5,).to_bytes(), b"12345");
220/// ```
221#[proc_macro]
222pub fn cstr(token: TokenStream) -> TokenStream {
223	let token = proc_macro2::TokenStream::from(token);
224	
225	let mut cstrline = SafeCStrBuilder::empty();
226	if !token.is_empty() {
227		let mut iter = token.into_iter();
228		let mut tree;
229		'main: loop {
230			tree = iter.next();
231			
232			'decode: loop {
233				match tree {
234					Some(TokenTree2::Literal(lit)) => { // 'a', "hello", 2.3
235						let data = lit.to_string();
236						let bytes = data.as_bytes();
237						let len = bytes.len();
238						
239						match len {
240							0 => {}, // empty
241							1 => { // 1
242								let a = unsafe {
243									debug_assert!({
244										#[allow(clippy::get_first)]
245										bytes.get(0).is_some()
246									});
247									
248									bytes.get_unchecked(0) // safety: see len == 1 and debug_assert
249								};
250								
251								thiserr_nullbyte!(lit, cstrline.push(*a));
252							},
253							len => { // 2/3/4/...
254								let first = unsafe { // safety: see match len, 0/1 - ignore, 2-3-4 - current
255									debug_assert!({
256										#[allow(clippy::get_first)]
257										bytes.get(0).is_some()
258									});
259									
260									bytes.get_unchecked(0)
261								};
262								let last = unsafe { // safety: see match len, 0/1 - ignore, 2-3-4 - current
263									debug_assert!(bytes.get(len-1).is_some());
264									
265									bytes.get_unchecked(len-1)
266								};
267								
268								match (first, last) {
269									(b'"', b'"') => { // example: "test"
270										let arr = unsafe {
271											debug_assert!(bytes.get(1.. len-1).is_some());
272											
273											bytes.get_unchecked(1.. len-1) // safety: see get and debug_assert
274										};
275										
276										thiserr_nullbyte!(lit, cstrline.extend_from_slice(arr));
277									},
278									(b'b', b'"') if bytes.get(1) == Some(&b'"') => { // example: b"test"
279										let arr = unsafe {
280											debug_assert!(bytes.get(1+1.. len-1).is_some());
281											
282											bytes.get_unchecked(1+1.. len-1) // safety: see get and debug_assert
283										};
284										
285										thiserr_nullbyte!(lit, cstrline.extend_from_slice(arr));
286									},
287									(b'\'', b'\'') /*if len == 3*/ => { // example: '1'
288										let arr = unsafe {
289											debug_assert!(bytes.get(1.. len-1).is_some());
290											
291											bytes.get_unchecked(1.. len-1) // safety: see get and debug_assert
292										};
293										
294										thiserr_nullbyte!(lit, cstrline.extend_from_slice(arr));
295									},
296									(b'b', b'\'') if /*len == 4 &&*/ bytes.get(1) == Some(&b'\'') => { // example: b'1'
297										let arr = unsafe {
298											debug_assert!(bytes.get(1+1.. len-1).is_some());
299											
300											bytes.get_unchecked(1+1.. len-1) // safety: see len == 4
301										};
302										
303										thiserr_nullbyte!(lit, cstrline.extend_from_slice(arr));
304									},
305									(_, _) if bytes.ends_with(b"u8") => { // 10u8
306										let bytes = unsafe {
307											debug_assert!(bytes.get(.. len-b"u8".len()).is_some());
308											
309											bytes.get_unchecked(.. len-b"u8".len()) // safety: see end_with + debug_assert
310										};
311										
312										let num: u8 = match String::from_utf8_lossy(bytes).parse() {
313											Ok(a) => a,
314											Err(..) => {
315												pm_compile_error!(lit.span(), "Input Error");
316											}
317										};
318										thiserr_nullbyte!(lit, cstrline.push(num));
319									},
320									(_, _) if bytes.ends_with(b"i8") => { // 10i8
321										let bytes = unsafe {
322											debug_assert!(bytes.get(.. len-b"i8".len()).is_some());
323											
324											bytes.get_unchecked(.. len-b"i8".len()) // safety: see end_with + debug_assert
325										};
326										
327										let num: i8 = match String::from_utf8_lossy(bytes).parse() {
328											Ok(a) => a,
329											Err(..) => {
330												pm_compile_error!(lit.span(), "Input Error");
331											}
332										};
333										thiserr_nullbyte!(lit, cstrline.push(num as _));
334									},
335									(_, _) => { // len always >1!
336										thiserr_nullbyte!(lit, cstrline.extend_from_slice(bytes));
337									},
338								}
339							}
340						}
341						
342						// Support for empty trailing comma.
343						// example: (test,)
344						//
345						let mut is_en_fatalblock = true;
346						'cparse: loop {
347							tree = iter.next();
348							match tree {
349								None => {
350									break 'main;
351								},
352								Some(TokenTree2::Punct(punct)) if ',' == punct.as_char() => {
353									if !is_en_fatalblock {
354										pm_compile_error!(punct.span(), "Unsupported.")
355									}
356									
357									is_en_fatalblock = false;
358									continue 'cparse;
359								},
360								
361								Some(..) if !is_en_fatalblock => {
362									continue 'decode;
363								},
364								Some(a_tree) => {
365									pm_compile_error!(a_tree.span(), "It was expected ',' or closing of a macro.")
366								},
367							}
368						}
369					},
370					Some(tk) => {
371						pm_compile_error!(tk.span(), "incorrect data, was expected: &[u8], str, u8, i8, {integer}.");
372					},
373					None => {
374						break 'main;
375					},
376				}
377				
378				#[allow(unreachable_code)] {
379					break 'decode;
380				}
381			}
382		}
383	}
384	
385	debug_assert!(cstrline.is_valid()); // debug internal check
386	let cstrline = cstrline.into();
387	let arr = &cstrline as &[u8];
388	debug_assert!( // debug internal check
389		CStr::from_bytes_with_nul(arr).is_ok()
390	);
391	let result = Literal::byte_string(arr);
392	let token = quote! {
393		{
394			const _H: &'static CStr = unsafe {
395				&*(#result /* b"lit_array" */ as *const [u8] as *const CStr) as &'static CStr
396			};
397			
398			_H
399		}
400	};
401	
402	TokenStream::from(token)
403}