magic_embed/
lib.rs

1//! # `magic-embed`: Compile-time Magic Database Embedding
2//!
3//! A procedural macro crate for embedding compiled [`pure_magic`](https://crates.io/crates/pure-magic) databases directly into your Rust binary.
4//! This crate provides a convenient way to bundle file type detection rules with your application,
5//! eliminating the need for external rule files at runtime.
6//!
7//! ## Features
8//!
9//! * **Compile-time Embedding**: Magic rule files are compiled and embedded during build
10//! * **Zero Runtime Dependencies**: No need to distribute separate rule files
11//! * **Flexible Configuration**: Include/exclude specific rule files or directories
12//! * **Seamless Integration**: Works with the [`pure_magic`](https://crates.io/crates/pure-magic)
13//!
14//! ## Installation
15//!
16//! Add `magic-embed` to your `Cargo.toml`:
17//!
18//! ```toml
19//! [dependencies]
20//! magic-embed = "0.1"  # Replace with the latest version
21//! pure-magic = "0.1"     # Required peer dependency
22//! ```
23//!
24//! ## Usage
25//!
26//! Apply the `#[magic_embed]` attribute to a struct to embed a compiled magic database:
27//!
28//! ```rust
29//! use magic_embed::magic_embed;
30//! use pure_magic::MagicDb;
31//!
32//! #[magic_embed(include=["../../magic-db/src/magdir"], exclude=["../../magic-db/src/magdir/der"])]
33//! struct MyMagicDb;
34//!
35//! fn main() -> Result<(), pure_magic::Error> {
36//!     let db = MyMagicDb::open()?;
37//!     // Use the database as you would with pure_magic
38//!     Ok(())
39//! }
40//! ```
41//!
42//! ## Attributes
43//!
44//! | Attribute | Type       | Required | Description |
45//! |-----------|------------|----------|-------------|
46//! | `include` | String[]   | Yes      | Paths to include in the database (files or directories) |
47//! | `exclude` | String[]   | No       | Paths to exclude from the database |
48//!
49//! ## Complete Example
50//!
51//! ```rust
52//! use magic_embed::magic_embed;
53//! use pure_magic::MagicDb;
54//! use std::fs::File;
55//! use std::env::current_exe;
56//!
57//! #[magic_embed(
58//!     include=["../../magic-db/src/magdir"],
59//!     exclude=["../../magic-db/src/magdir/der"]
60//! )]
61//! struct AppMagicDb;
62//!
63//! fn main() -> Result<(), Box<dyn std::error::Error>> {
64//!     // Open the embedded database
65//!     let db = AppMagicDb::open()?;
66//!
67//!     // Use it to detect file types
68//!     let mut file = File::open(current_exe()?)?;
69//!     let magic = db.first_magic(&mut file, None)?;
70//!
71//!     println!("Detected: {} (MIME: {})", magic.message(), magic.mime_type());
72//!     Ok(())
73//! }
74//! ```
75//!
76//! ## Build Configuration
77//!
78//! To ensure your database is rebuilt when rule files change, create a `build.rs` file:
79//!
80//! ```rust,ignore
81//! // build.rs
82//! fn main() {
83//!     println!("cargo:rerun-if-changed=magic/rules/");
84//! }
85//! ```
86//!
87//! Replace `magic/rules/` with the path to your actual rule files.
88//!
89//! ## How It Works
90//!
91//! 1. **Compile Time**: The macro compiles all specified magic rule files into a binary database
92//! 2. **Embedding**: The compiled database is embedded in your binary as a byte array
93//! 3. **Runtime**: The `open()` method deserializes the embedded database
94//!
95//! ## Performance Considerations
96//!
97//! - The database is compiled only when source files change
98//! - Embedded databases increase binary size but eliminate runtime file I/O
99//! - Database deserialization happens once at runtime when `open()` is called
100//!
101//! ## License
102//!
103//! This project is licensed under the **GPL-3.0 License**.
104
105use std::{
106    collections::{HashMap, HashSet},
107    path::PathBuf,
108};
109
110use proc_macro::TokenStream;
111use pure_magic::{MagicDb, MagicSource};
112use quote::quote;
113use syn::{
114    Expr, ExprArray, ItemStruct, Meta, MetaNameValue, Token, parse::Parser, punctuated::Punctuated,
115};
116
117/// Parser for procedural macro attributes
118///
119/// Processes comma-separated key-value attributes for the `magic_embed` macro.
120struct MetaParser {
121    attr: proc_macro2::TokenStream,
122    metas: HashMap<String, Meta>,
123}
124
125impl MetaParser {
126    /// Creates a new [`MetaParser`] from a token stream
127    ///
128    /// # Arguments
129    ///
130    /// * `attr` - [`proc_macro2::TokenStream`] - Attribute token stream to parse
131    ///
132    /// # Returns
133    ///
134    /// * `Result<Self, syn::Error>` - Parsed metadata or syntax error
135    fn parse_meta(attr: proc_macro2::TokenStream) -> Result<Self, syn::Error> {
136        let mut out = HashMap::new();
137
138        // parser for a comma-separated list of Meta entries
139        let parser = Punctuated::<Meta, Token![,]>::parse_terminated;
140
141        let metas = match parser.parse2(attr.clone()) {
142            Ok(m) => m,
143            Err(e) => return Err(syn::Error::new_spanned(attr, e.to_string())),
144        };
145
146        for meta in metas {
147            out.insert(
148                meta.path()
149                    .get_ident()
150                    .ok_or(syn::Error::new_spanned(
151                        meta.clone(),
152                        "failed to process meta",
153                    ))?
154                    .to_string(),
155                meta,
156            );
157        }
158        Ok(Self {
159            attr: attr.clone(),
160            metas: out,
161        })
162    }
163
164    /// Retrieves a key-value attribute by name
165    ///
166    /// # Arguments
167    ///
168    /// * `key` - `&str` - Name of the attribute to retrieve
169    ///
170    /// # Returns
171    ///
172    /// * `Result<Option<&MetaNameValue>, syn::Error>` - Found attribute or error
173    fn get_key_value(&self, key: &str) -> Result<Option<&MetaNameValue>, syn::Error> {
174        if let Some(meta) = self.metas.get(key) {
175            match meta {
176                Meta::NameValue(m) => return Ok(Some(m)),
177                _ => {
178                    return Err(syn::Error::new_spanned(
179                        &self.attr,
180                        format!("expecting a key value attribute: {key}"),
181                    ));
182                }
183            }
184        }
185        Ok(None)
186    }
187}
188
189/// Converts a [`MetaNameValue`] array expression to a vector of strings
190///
191/// # Arguments
192///
193/// * `nv` - Name-value attribute containing array
194///
195/// # Returns
196///
197/// * `Result<Vec<(proc_macro2::Span, String)>, syn::Error>` - Vector of (span, string) tuples
198fn meta_name_value_to_string_vec(
199    nv: &MetaNameValue,
200) -> Result<Vec<(proc_macro2::Span, String)>, syn::Error> {
201    if let Expr::Array(ExprArray { elems, .. }) = &nv.value {
202        Ok(elems
203            .into_iter()
204            .filter_map(|e| match e {
205                Expr::Lit(syn::ExprLit {
206                    lit: syn::Lit::Str(lit_str),
207                    ..
208                }) => Some((lit_str.span(), lit_str.value())),
209                _ => None,
210            })
211            .collect::<Vec<_>>())
212    } else {
213        Err(syn::Error::new_spanned(
214            &nv.value,
215            "expected an array literal like [\"foo\", \"bar\"]",
216        ))
217    }
218}
219
220fn impl_magic_embed(attr: TokenStream, item: TokenStream) -> Result<TokenStream, syn::Error> {
221    // Parse the input function
222    let input_struct: ItemStruct = syn::parse2(item.into())?;
223    let struct_name = &input_struct.ident;
224    let cs = proc_macro::Span::call_site();
225
226    let Some(source_file) = cs.local_file() else {
227        return Ok(quote! {}.into());
228    };
229
230    let source_dir = source_file.parent().unwrap();
231
232    // convert to proc-macro2 TokenStream for syn helpers
233    let ts2: proc_macro2::TokenStream = attr.into();
234
235    let struct_vis = input_struct.vis;
236
237    let metas = MetaParser::parse_meta(ts2)?;
238
239    let exclude = if let Some(exclude) = metas.get_key_value("exclude")? {
240        meta_name_value_to_string_vec(exclude)?
241            .into_iter()
242            .map(|(s, p)| (s, source_dir.join(p)))
243            .collect()
244    } else {
245        vec![]
246    };
247
248    let include_nv = metas.get_key_value("include")?.ok_or(syn::Error::new(
249        struct_name.span(),
250        "expected  a list of files or directory to include: \"include\" = [\"magdir\"]",
251    ))?;
252
253    let include: Vec<(proc_macro2::Span, PathBuf)> = meta_name_value_to_string_vec(include_nv)?
254        .into_iter()
255        .map(|(s, p)| (s, source_dir.join(p)))
256        .collect();
257
258    // we don't walk rules recursively
259    let mut wo = fs_walk::WalkOptions::new();
260    wo.files().max_depth(0).sort(true);
261
262    let mut db = MagicDb::new();
263
264    let exclude_set: HashSet<PathBuf> = exclude.into_iter().map(|(_, p)| p).collect();
265
266    macro_rules! load_file {
267        ($span: expr, $path: expr) => {
268            let f = MagicSource::open($path).map_err(|e| {
269                syn::Error::new(
270                    $span.clone(),
271                    format!(
272                        "failed to parse magic file={}: {e}",
273                        $path.to_string_lossy()
274                    ),
275                )
276            })?;
277            db.load(f).map_err(|e| {
278                syn::Error::new(
279                    $span.clone(),
280                    format!("database failed to load magic file: {e}"),
281                )
282            })?;
283        };
284    }
285
286    for (s, p) in include.iter() {
287        if p.is_dir() {
288            for rule_file in wo.walk(p) {
289                let rule_file = rule_file
290                    .map_err(|e| syn::Error::new(*s, format!("failed to list rule file: {e}")))?;
291
292                if exclude_set.contains(&rule_file) {
293                    continue;
294                }
295
296                load_file!(s, &rule_file);
297            }
298        } else if p.is_file() {
299            load_file!(s, p);
300        }
301    }
302
303    // Serialize and save database
304    let mut ser = vec![];
305    db.serialize(&mut ser).map_err(|e| {
306        syn::Error::new(
307            struct_name.span(),
308            format!("failed to serialize database: {e}"),
309        )
310    })?;
311
312    // Generate the output: the original function + a print statement
313    let output = quote! {
314        /// This structure exposes an embedded compiled magic database.
315        #struct_vis struct #struct_name;
316
317        impl #struct_name {
318            const DB: &[u8] = &[ #( #ser ),* ];
319
320            /// Opens the embedded magic database and returns a [`pure_magic::MagicDb`]
321            #struct_vis fn open() -> Result<pure_magic::MagicDb, pure_magic::Error> {
322                pure_magic::MagicDb::deserialize(&mut Self::DB.as_ref())
323            }
324        }
325    };
326
327    Ok(output.into())
328}
329
330/// Procedural macro to embed a compiled [`pure_magic::MagicDb`]
331///
332/// This attribute macro compiles magic rule files at program
333/// compile time and embeds them in the binary. The database
334/// will not be automatically rebuilt when rule files change
335/// (c.f. see Note section below).
336///
337/// # Attributes
338///
339/// * `include` - Array of paths to include in the database (required)
340/// * `exclude` - Array of paths to exclude from the database (optional)
341///
342/// # Examples
343///
344/// ```
345/// use magic_embed::magic_embed;
346/// use pure_magic::MagicDb;
347///
348/// #[magic_embed(include=["../../magic-db/src/magdir"], exclude=["../../magic-db/src/magdir/der"])]
349/// struct EmbeddedMagicDb;
350///
351/// let db: MagicDb = EmbeddedMagicDb::open().unwrap();
352/// ```
353///
354/// # Errors
355///
356/// This macro will emit a compile-time error if:
357/// - The `include` attribute is missing
358/// - Specified paths don't exist
359/// - Database compilation fails
360/// - File I/O operations fail
361///
362/// # Note
363///
364/// If you want Cargo to track changes to your rule files (e.g., `magdir/`),
365/// you **must** create a build script in your project. The proc-macro cannot
366/// track these files directly because it embeds only the compiled database,
367/// not the rule files themselves. Add a `build.rs` file like this:
368///
369/// ```ignore
370/// // build.rs
371/// fn main() {
372///     println!("cargo::rerun-if-changed=magdir/");
373/// }
374/// ```
375///
376/// Replace `magdir/` with the path to your rule files.
377#[proc_macro_attribute]
378pub fn magic_embed(attr: TokenStream, item: TokenStream) -> TokenStream {
379    match impl_magic_embed(attr, item) {
380        Ok(ts) => ts,
381        Err(e) => e.to_compile_error().into(),
382    }
383}