magic_embed/lib.rs
1#![forbid(unsafe_code)]
2#![deny(unused_imports)]
3#![deny(missing_docs)]
4//! # `magic-embed`: Compile-time Magic Database Embedding
5//!
6//! A procedural macro crate for embedding compiled [`pure_magic`](https://crates.io/crates/pure-magic) databases directly into your Rust binary.
7//! This crate provides a convenient way to bundle file type detection rules with your application,
8//! eliminating the need for external rule files at runtime.
9//!
10//! ## Features
11//!
12//! * **Compile-time Embedding**: Magic rule files are compiled and embedded during build
13//! * **Zero Runtime Dependencies**: No need to distribute separate rule files
14//! * **Flexible Configuration**: Include/exclude specific rule files or directories
15//! * **Seamless Integration**: Works with the [`pure_magic`](https://crates.io/crates/pure-magic)
16//!
17//! ## Installation
18//!
19//! Add `magic-embed` to your `Cargo.toml`:
20//!
21//! ```toml
22//! [dependencies]
23//! magic-embed = "0.1" # Replace with the latest version
24//! pure-magic = "0.1" # Required peer dependency
25//! ```
26//!
27//! ## Usage
28//!
29//! Apply the `#[magic_embed]` attribute to a struct to embed a compiled magic database:
30//!
31//! ```rust
32//! use magic_embed::magic_embed;
33//! use pure_magic::MagicDb;
34//!
35//! #[magic_embed(include=["../../magic-db/src/magdir"], exclude=["../../magic-db/src/magdir/der"])]
36//! struct MyMagicDb;
37//!
38//! fn main() -> Result<(), pure_magic::Error> {
39//! let db = MyMagicDb::open()?;
40//! // Use the database as you would with pure_magic
41//! Ok(())
42//! }
43//! ```
44//!
45//! ## Attributes
46//!
47//! | Attribute | Type | Required | Description |
48//! |-----------|------------|----------|-------------|
49//! | `include` | String[] | Yes | Paths to include in the database (files or directories) |
50//! | `exclude` | String[] | No | Paths to exclude from the database |
51//!
52//! ## Complete Example
53//!
54//! ```rust
55//! use magic_embed::magic_embed;
56//! use pure_magic::MagicDb;
57//! use std::fs::File;
58//! use std::env::current_exe;
59//!
60//! #[magic_embed(
61//! include=["../../magic-db/src/magdir"],
62//! exclude=["../../magic-db/src/magdir/der"]
63//! )]
64//! struct AppMagicDb;
65//!
66//! fn main() -> Result<(), Box<dyn std::error::Error>> {
67//! // Open the embedded database
68//! let db = AppMagicDb::open()?;
69//!
70//! // Use it to detect file types
71//! let mut file = File::open(current_exe()?)?;
72//! let magic = db.first_magic(&mut file, None)?;
73//!
74//! println!("Detected: {} (MIME: {})", magic.message(), magic.mime_type());
75//! Ok(())
76//! }
77//! ```
78//!
79//! ## Build Configuration
80//!
81//! To ensure your database is rebuilt when rule files change, create a `build.rs` file:
82//!
83//! ```rust,ignore
84//! // build.rs
85//! fn main() {
86//! println!("cargo:rerun-if-changed=magic/rules/");
87//! }
88//! ```
89//!
90//! Replace `magic/rules/` with the path to your actual rule files.
91//!
92//! ## How It Works
93//!
94//! 1. **Compile Time**: The macro compiles all specified magic rule files into a binary database
95//! 2. **Embedding**: The compiled database is embedded in your binary as a byte array
96//! 3. **Runtime**: The `open()` method deserializes the embedded database
97//!
98//! ## Performance Considerations
99//!
100//! - The database is compiled only when source files change
101//! - Embedded databases increase binary size but eliminate runtime file I/O
102//! - Database deserialization happens once at runtime when `open()` is called
103//!
104//! ## License
105//!
106//! This project is dual-licensed under either:
107//! - **GPL-3.0**
108//! - **BSD-2-Clause**
109
110use std::{
111 collections::{HashMap, HashSet},
112 path::PathBuf,
113};
114
115use proc_macro::TokenStream;
116use pure_magic::{MagicDb, MagicSource};
117use quote::quote;
118use syn::{
119 Expr, ExprArray, ItemStruct, Meta, MetaNameValue, Token, parse::Parser, punctuated::Punctuated,
120 spanned::Spanned,
121};
122
123/// Parser for procedural macro attributes
124///
125/// Processes comma-separated key-value attributes for the `magic_embed` macro.
126struct MetaParser {
127 attr: proc_macro2::TokenStream,
128 metas: HashMap<String, Meta>,
129}
130
131impl MetaParser {
132 /// Creates a new [`MetaParser`] from a token stream
133 ///
134 /// # Arguments
135 ///
136 /// * `attr` - [`proc_macro2::TokenStream`] - Attribute token stream to parse
137 ///
138 /// # Returns
139 ///
140 /// * `Result<Self, syn::Error>` - Parsed metadata or syntax error
141 fn parse_meta(attr: proc_macro2::TokenStream) -> Result<Self, syn::Error> {
142 let mut out = HashMap::new();
143
144 // parser for a comma-separated list of Meta entries
145 let parser = Punctuated::<Meta, Token![,]>::parse_terminated;
146
147 let metas = match parser.parse2(attr.clone()) {
148 Ok(m) => m,
149 Err(e) => return Err(syn::Error::new_spanned(attr, e.to_string())),
150 };
151
152 for meta in metas {
153 out.insert(
154 meta.path()
155 .get_ident()
156 .ok_or(syn::Error::new_spanned(
157 meta.clone(),
158 "failed to process meta",
159 ))?
160 .to_string(),
161 meta,
162 );
163 }
164 Ok(Self {
165 attr: attr.clone(),
166 metas: out,
167 })
168 }
169
170 /// Retrieves a key-value attribute by name
171 ///
172 /// # Arguments
173 ///
174 /// * `key` - `&str` - Name of the attribute to retrieve
175 ///
176 /// # Returns
177 ///
178 /// * `Result<Option<&MetaNameValue>, syn::Error>` - Found attribute or error
179 fn get_key_value(&self, key: &str) -> Result<Option<&MetaNameValue>, syn::Error> {
180 if let Some(meta) = self.metas.get(key) {
181 match meta {
182 Meta::NameValue(m) => return Ok(Some(m)),
183 _ => {
184 return Err(syn::Error::new_spanned(
185 &self.attr,
186 format!("expecting a key value attribute: {key}"),
187 ));
188 }
189 }
190 }
191 Ok(None)
192 }
193}
194
195/// Converts a [`MetaNameValue`] array expression to a vector of strings
196///
197/// # Arguments
198///
199/// * `nv` - Name-value attribute containing array
200///
201/// # Returns
202///
203/// * `Result<Vec<(proc_macro2::Span, String)>, syn::Error>` - Vector of (span, string) tuples
204fn meta_name_value_to_string_vec(
205 nv: &MetaNameValue,
206) -> Result<Vec<(proc_macro2::Span, String)>, syn::Error> {
207 if let Expr::Array(ExprArray { elems, .. }) = &nv.value {
208 Ok(elems
209 .into_iter()
210 .filter_map(|e| match e {
211 Expr::Lit(syn::ExprLit {
212 lit: syn::Lit::Str(lit_str),
213 ..
214 }) => Some((lit_str.span(), lit_str.value())),
215 _ => None,
216 })
217 .collect::<Vec<_>>())
218 } else {
219 Err(syn::Error::new_spanned(
220 &nv.value,
221 "expected an array literal like [\"foo\", \"bar\"]",
222 ))
223 }
224}
225
226fn impl_magic_embed(attr: TokenStream, item: TokenStream) -> Result<TokenStream, syn::Error> {
227 // Parse the input function
228 let input_struct: ItemStruct = syn::parse2(item.into())?;
229 let struct_name = &input_struct.ident;
230 let cs = proc_macro::Span::call_site();
231
232 let Some(source_file) = cs.local_file() else {
233 return Ok(quote! {}.into());
234 };
235
236 let source_dir = source_file.parent().unwrap();
237
238 // convert to proc-macro2 TokenStream for syn helpers
239 let ts2: proc_macro2::TokenStream = attr.into();
240
241 let struct_vis = input_struct.vis;
242
243 let metas = MetaParser::parse_meta(ts2)?;
244
245 let exclude = if let Some(exclude) = metas.get_key_value("exclude")? {
246 meta_name_value_to_string_vec(exclude)?
247 .into_iter()
248 .map(|(s, p)| (s, source_dir.join(p)))
249 .collect()
250 } else {
251 vec![]
252 };
253
254 let include_nv = metas.get_key_value("include")?.ok_or(syn::Error::new(
255 struct_name.span(),
256 "expected a list of files or directory to include: \"include\" = [\"magdir\"]",
257 ))?;
258
259 let include: Vec<(proc_macro2::Span, PathBuf)> = meta_name_value_to_string_vec(include_nv)?
260 .into_iter()
261 .map(|(s, p)| (s, source_dir.join(p)))
262 .collect();
263
264 // we don't walk rules recursively
265 let mut wo = fs_walk::WalkOptions::new();
266 wo.files().max_depth(0).sort(true);
267
268 let mut db = MagicDb::new();
269
270 let exclude_set: HashSet<PathBuf> = exclude.into_iter().map(|(_, p)| p).collect();
271
272 macro_rules! load_file {
273 ($span: expr, $path: expr) => {
274 MagicSource::open($path).map_err(|e| {
275 syn::Error::new(
276 $span.clone(),
277 format!(
278 "failed to parse magic file={}: {e}",
279 $path.to_string_lossy()
280 ),
281 )
282 })?
283 };
284 }
285
286 let mut rules = vec![];
287 for (s, p) in include.iter() {
288 if p.is_dir() {
289 for rule_file in wo.walk(p) {
290 let rule_file = rule_file
291 .map_err(|e| syn::Error::new(*s, format!("failed to list rule file: {e}")))?;
292
293 if exclude_set.contains(&rule_file) {
294 continue;
295 }
296
297 rules.push(load_file!(s, &rule_file));
298 }
299 } else if p.is_file() {
300 rules.push(load_file!(s, p));
301 }
302 }
303
304 db.load_bulk(rules.into_iter());
305 db.verify().map_err(|e| {
306 syn::Error::new(include_nv.span(), format!("inconsistent database: {e}"))
307 })?;
308
309 // Serialize and save database
310 let mut ser = vec![];
311 db.serialize(&mut ser).map_err(|e| {
312 syn::Error::new(
313 struct_name.span(),
314 format!("failed to serialize database: {e}"),
315 )
316 })?;
317
318 // Generate the output: the original function + a print statement
319 let output = quote! {
320 /// This structure exposes an embedded compiled magic database.
321 #struct_vis struct #struct_name;
322
323 impl #struct_name {
324 const DB: &[u8] = &[ #( #ser ),* ];
325
326 /// Opens the embedded magic database and returns a [`pure_magic::MagicDb`]
327 #struct_vis fn open() -> Result<pure_magic::MagicDb, pure_magic::Error> {
328 pure_magic::MagicDb::deserialize(&mut Self::DB.as_ref())
329 }
330 }
331 };
332
333 Ok(output.into())
334}
335
336/// Procedural macro to embed a compiled [`pure_magic::MagicDb`]
337///
338/// This attribute macro compiles magic rule files at program
339/// compile time and embeds them in the binary. The database
340/// will not be automatically rebuilt when rule files change
341/// (c.f. see Note section below).
342///
343/// # Attributes
344///
345/// * `include` - Array of paths to include in the database (required)
346/// * `exclude` - Array of paths to exclude from the database (optional)
347///
348/// # Examples
349///
350/// ```
351/// use magic_embed::magic_embed;
352/// use pure_magic::MagicDb;
353///
354/// #[magic_embed(include=["../../magic-db/src/magdir"], exclude=["../../magic-db/src/magdir/der"])]
355/// struct EmbeddedMagicDb;
356///
357/// let db: MagicDb = EmbeddedMagicDb::open().unwrap();
358/// ```
359///
360/// # Errors
361///
362/// This macro will emit a compile-time error if:
363/// - The `include` attribute is missing
364/// - Specified paths don't exist
365/// - Database compilation fails
366/// - File I/O operations fail
367///
368/// # Note
369///
370/// If you want Cargo to track changes to your rule files (e.g., `magdir/`),
371/// you **must** create a build script in your project. The proc-macro cannot
372/// track these files directly because it embeds only the compiled database,
373/// not the rule files themselves. Add a `build.rs` file like this:
374///
375/// ```ignore
376/// // build.rs
377/// fn main() {
378/// println!("cargo::rerun-if-changed=magdir/");
379/// }
380/// ```
381///
382/// Replace `magdir/` with the path to your rule files.
383#[proc_macro_attribute]
384pub fn magic_embed(attr: TokenStream, item: TokenStream) -> TokenStream {
385 match impl_magic_embed(attr, item) {
386 Ok(ts) => ts,
387 Err(e) => e.to_compile_error().into(),
388 }
389}