Skip to main content

magic_embed/
lib.rs

1#![forbid(unsafe_code)]
2#![deny(unused_imports)]
3#![deny(missing_docs)]
4//! # `magic-embed`: Compile-time Magic Database Embedding
5//!
6//! A procedural macro crate for embedding compiled [`pure_magic`](https://crates.io/crates/pure-magic) databases directly into your Rust binary.
7//! This crate provides a convenient way to bundle file type detection rules with your application,
8//! eliminating the need for external rule files at runtime.
9//!
10//! ## Features
11//!
12//! * **Compile-time Embedding**: Magic rule files are compiled and embedded during build
13//! * **Zero Runtime Dependencies**: No need to distribute separate rule files
14//! * **Flexible Configuration**: Include/exclude specific rule files or directories
15//! * **Seamless Integration**: Works with the [`pure_magic`](https://crates.io/crates/pure-magic)
16//!
17//! ## Installation
18//!
19//! Add `magic-embed` to your `Cargo.toml`:
20//!
21//! ```toml
22//! [dependencies]
23//! magic-embed = "0.1"  # Replace with the latest version
24//! pure-magic = "0.1"     # Required peer dependency
25//! ```
26//!
27//! ## Usage
28//!
29//! Apply the `#[magic_embed]` attribute to a struct to embed a compiled magic database:
30//!
31//! ```rust
32//! use magic_embed::magic_embed;
33//! use pure_magic::MagicDb;
34//!
35//! #[magic_embed(include=["../../magic-db/src/magdir"], exclude=["../../magic-db/src/magdir/der"])]
36//! struct MyMagicDb;
37//!
38//! fn main() -> Result<(), pure_magic::Error> {
39//!     let db = MyMagicDb::open()?;
40//!     // Use the database as you would with pure_magic
41//!     Ok(())
42//! }
43//! ```
44//!
45//! ## Attributes
46//!
47//! | Attribute | Type       | Required | Description |
48//! |-----------|------------|----------|-------------|
49//! | `include` | String[]   | Yes      | Paths to include in the database (files or directories) |
50//! | `exclude` | String[]   | No       | Paths to exclude from the database |
51//!
52//! ## Complete Example
53//!
54//! ```rust
55//! use magic_embed::magic_embed;
56//! use pure_magic::{MagicDb, DataReader};
57//! use std::fs::File;
58//! use std::env::current_exe;
59//!
60//! #[magic_embed(
61//!     include=["../../magic-db/src/magdir"],
62//!     exclude=["../../magic-db/src/magdir/der"]
63//! )]
64//! struct AppMagicDb;
65//!
66//! fn main() -> Result<(), Box<dyn std::error::Error>> {
67//!     // Open the embedded database
68//!     let db = AppMagicDb::open()?;
69//!
70//!     // Use it to detect file types
71//!     let magic = db.first_magic_file(current_exe()?)?;
72//!
73//!     println!("Detected: {} (MIME: {})", magic.message(), magic.mime_type());
74//!     Ok(())
75//! }
76//! ```
77//!
78//! ## Build Configuration
79//!
80//! To ensure your database is rebuilt when rule files change, create a `build.rs` file:
81//!
82//! ```rust,ignore
83//! // build.rs
84//! fn main() {
85//!     println!("cargo:rerun-if-changed=magic/rules/");
86//! }
87//! ```
88//!
89//! Replace `magic/rules/` with the path to your actual rule files.
90//!
91//! ## How It Works
92//!
93//! 1. **Compile Time**: The macro compiles all specified magic rule files into a binary database
94//! 2. **Embedding**: The compiled database is embedded in your binary as a byte array
95//! 3. **Runtime**: The `open()` method deserializes the embedded database
96//!
97//! ## Performance Considerations
98//!
99//! - The database is compiled only when source files change
100//! - Embedded databases increase binary size but eliminate runtime file I/O
101//! - Database deserialization happens once at runtime when `open()` is called
102//!
103//! ## License
104//!
105//! This project is dual-licensed under either:
106//! - **GPL-3.0**
107//! - **BSD-2-Clause**
108
109use std::{
110    collections::{HashMap, HashSet},
111    path::PathBuf,
112};
113
114use proc_macro::TokenStream;
115use pure_magic::{MagicDb, MagicSource};
116use quote::quote;
117use syn::{
118    Expr, ExprArray, ItemStruct, Meta, MetaNameValue, Token, parse::Parser, punctuated::Punctuated,
119    spanned::Spanned,
120};
121
122/// Parser for procedural macro attributes
123///
124/// Processes comma-separated key-value attributes for the `magic_embed` macro.
125struct MetaParser {
126    attr: proc_macro2::TokenStream,
127    metas: HashMap<String, Meta>,
128}
129
130impl MetaParser {
131    /// Creates a new [`MetaParser`] from a token stream
132    ///
133    /// # Arguments
134    ///
135    /// * `attr` - [`proc_macro2::TokenStream`] - Attribute token stream to parse
136    ///
137    /// # Returns
138    ///
139    /// * `Result<Self, syn::Error>` - Parsed metadata or syntax error
140    fn parse_meta(attr: proc_macro2::TokenStream) -> Result<Self, syn::Error> {
141        let mut out = HashMap::new();
142
143        // parser for a comma-separated list of Meta entries
144        let parser = Punctuated::<Meta, Token![,]>::parse_terminated;
145
146        let metas = match parser.parse2(attr.clone()) {
147            Ok(m) => m,
148            Err(e) => return Err(syn::Error::new_spanned(attr, e.to_string())),
149        };
150
151        for meta in metas {
152            out.insert(
153                meta.path()
154                    .get_ident()
155                    .ok_or(syn::Error::new_spanned(
156                        meta.clone(),
157                        "failed to process meta",
158                    ))?
159                    .to_string(),
160                meta,
161            );
162        }
163        Ok(Self {
164            attr: attr.clone(),
165            metas: out,
166        })
167    }
168
169    /// Retrieves a key-value attribute by name
170    ///
171    /// # Arguments
172    ///
173    /// * `key` - `&str` - Name of the attribute to retrieve
174    ///
175    /// # Returns
176    ///
177    /// * `Result<Option<&MetaNameValue>, syn::Error>` - Found attribute or error
178    fn get_key_value(&self, key: &str) -> Result<Option<&MetaNameValue>, syn::Error> {
179        if let Some(meta) = self.metas.get(key) {
180            match meta {
181                Meta::NameValue(m) => return Ok(Some(m)),
182                _ => {
183                    return Err(syn::Error::new_spanned(
184                        &self.attr,
185                        format!("expecting a key value attribute: {key}"),
186                    ));
187                }
188            }
189        }
190        Ok(None)
191    }
192}
193
194/// Converts a [`MetaNameValue`] array expression to a vector of strings
195///
196/// # Arguments
197///
198/// * `nv` - Name-value attribute containing array
199///
200/// # Returns
201///
202/// * `Result<Vec<(proc_macro2::Span, String)>, syn::Error>` - Vector of (span, string) tuples
203fn meta_name_value_to_string_vec(
204    nv: &MetaNameValue,
205) -> Result<Vec<(proc_macro2::Span, String)>, syn::Error> {
206    if let Expr::Array(ExprArray { elems, .. }) = &nv.value {
207        Ok(elems
208            .into_iter()
209            .filter_map(|e| match e {
210                Expr::Lit(syn::ExprLit {
211                    lit: syn::Lit::Str(lit_str),
212                    ..
213                }) => Some((lit_str.span(), lit_str.value())),
214                _ => None,
215            })
216            .collect::<Vec<_>>())
217    } else {
218        Err(syn::Error::new_spanned(
219            &nv.value,
220            "expected an array literal like [\"foo\", \"bar\"]",
221        ))
222    }
223}
224
225fn impl_magic_embed(attr: TokenStream, item: TokenStream) -> Result<TokenStream, syn::Error> {
226    // Parse the input function
227    let input_struct: ItemStruct = syn::parse2(item.into())?;
228    let struct_name = &input_struct.ident;
229    let cs = proc_macro::Span::call_site();
230
231    let Some(source_file) = cs.local_file() else {
232        return Ok(quote! {}.into());
233    };
234
235    let source_dir = source_file.parent().unwrap();
236
237    // convert to proc-macro2 TokenStream for syn helpers
238    let ts2: proc_macro2::TokenStream = attr.into();
239
240    let struct_vis = input_struct.vis;
241
242    let metas = MetaParser::parse_meta(ts2)?;
243
244    let exclude = if let Some(exclude) = metas.get_key_value("exclude")? {
245        meta_name_value_to_string_vec(exclude)?
246            .into_iter()
247            .map(|(s, p)| (s, source_dir.join(p)))
248            .collect()
249    } else {
250        vec![]
251    };
252
253    let include_nv = metas.get_key_value("include")?.ok_or(syn::Error::new(
254        struct_name.span(),
255        "expected  a list of files or directory to include: \"include\" = [\"magdir\"]",
256    ))?;
257
258    let include: Vec<(proc_macro2::Span, PathBuf)> = meta_name_value_to_string_vec(include_nv)?
259        .into_iter()
260        .map(|(s, p)| (s, source_dir.join(p)))
261        .collect();
262
263    // we don't walk rules recursively
264    let mut wo = fs_walk::WalkOptions::new();
265    wo.files().max_depth(0).sort(true);
266
267    let mut db = MagicDb::new();
268
269    let exclude_set: HashSet<PathBuf> = exclude.into_iter().map(|(_, p)| p).collect();
270
271    macro_rules! load_file {
272        ($span: expr, $path: expr) => {
273            MagicSource::open($path).map_err(|e| {
274                syn::Error::new(
275                    $span.clone(),
276                    format!(
277                        "failed to parse magic file={}: {e}",
278                        $path.to_string_lossy()
279                    ),
280                )
281            })?
282        };
283    }
284
285    let mut rules = vec![];
286    for (s, p) in include.iter() {
287        if p.is_dir() {
288            for rule_file in wo.walk(p) {
289                let rule_file = rule_file
290                    .map_err(|e| syn::Error::new(*s, format!("failed to list rule file: {e}")))?;
291
292                if exclude_set.contains(&rule_file) {
293                    continue;
294                }
295
296                rules.push(load_file!(s, &rule_file));
297            }
298        } else if p.is_file() {
299            rules.push(load_file!(s, p));
300        }
301    }
302
303    db.load_bulk(rules.into_iter());
304    db.verify()
305        .map_err(|e| syn::Error::new(include_nv.span(), format!("inconsistent database: {e}")))?;
306
307    // Serialize and save database
308    let mut ser = vec![];
309    db.serialize(&mut ser).map_err(|e| {
310        syn::Error::new(
311            struct_name.span(),
312            format!("failed to serialize database: {e}"),
313        )
314    })?;
315
316    // Generate the output: the original function + a print statement
317    let output = quote! {
318        /// This structure exposes an embedded compiled magic database.
319        #struct_vis struct #struct_name;
320
321        impl #struct_name {
322            const DB: &[u8] = &[ #( #ser ),* ];
323
324            /// Opens the embedded magic database and returns a [`pure_magic::MagicDb`]
325            #struct_vis fn open() -> Result<pure_magic::MagicDb, pure_magic::Error> {
326                pure_magic::MagicDb::deserialize(&mut Self::DB.as_ref())
327            }
328        }
329    };
330
331    Ok(output.into())
332}
333
334/// Procedural macro to embed a compiled [`pure_magic::MagicDb`]
335///
336/// This attribute macro compiles magic rule files at program
337/// compile time and embeds them in the binary. The database
338/// will not be automatically rebuilt when rule files change
339/// (c.f. see Note section below).
340///
341/// # Attributes
342///
343/// * `include` - Array of paths to include in the database (required)
344/// * `exclude` - Array of paths to exclude from the database (optional)
345///
346/// # Examples
347///
348/// ```
349/// use magic_embed::magic_embed;
350/// use pure_magic::MagicDb;
351///
352/// #[magic_embed(include=["../../magic-db/src/magdir"], exclude=["../../magic-db/src/magdir/der"])]
353/// struct EmbeddedMagicDb;
354///
355/// let db: MagicDb = EmbeddedMagicDb::open().unwrap();
356/// ```
357///
358/// # Errors
359///
360/// This macro will emit a compile-time error if:
361/// - The `include` attribute is missing
362/// - Specified paths don't exist
363/// - Database compilation fails
364/// - File I/O operations fail
365///
366/// # Note
367///
368/// If you want Cargo to track changes to your rule files (e.g., `magdir/`),
369/// you **must** create a build script in your project. The proc-macro cannot
370/// track these files directly because it embeds only the compiled database,
371/// not the rule files themselves. Add a `build.rs` file like this:
372///
373/// ```ignore
374/// // build.rs
375/// fn main() {
376///     println!("cargo::rerun-if-changed=magdir/");
377/// }
378/// ```
379///
380/// Replace `magdir/` with the path to your rule files.
381#[proc_macro_attribute]
382pub fn magic_embed(attr: TokenStream, item: TokenStream) -> TokenStream {
383    match impl_magic_embed(attr, item) {
384        Ok(ts) => ts,
385        Err(e) => e.to_compile_error().into(),
386    }
387}