magic_embed/
lib.rs

1//! # `magic-embed`: Compile-time Magic Database Embedding
2//!
3//! A procedural macro crate for embedding compiled [`magic_rs`](https://crates.io/crates/magic-rs) databases directly into your Rust binary.
4//! This crate provides a convenient way to bundle file type detection rules with your application,
5//! eliminating the need for external rule files at runtime.
6//!
7//! ## Features
8//!
9//! * **Compile-time Embedding**: Magic rule files are compiled and embedded during build
10//! * **Zero Runtime Dependencies**: No need to distribute separate rule files
11//! * **Flexible Configuration**: Include/exclude specific rule files or directories
12//! * **Seamless Integration**: Works with the [`magic_rs`](https://crates.io/crates/magic-rs)
13//!
14//! ## Installation
15//!
16//! Add `magic-embed` to your `Cargo.toml`:
17//!
18//! ```toml
19//! [dependencies]
20//! magic-embed = "0.1"  # Replace with the latest version
21//! magic-rs = "0.1"     # Required peer dependency
22//! ```
23//!
24//! ## Usage
25//!
26//! Apply the `#[magic_embed]` attribute to a struct to embed a compiled magic database:
27//!
28//! ```rust
29//! use magic_embed::magic_embed;
30//! use magic_rs::MagicDb;
31//!
32//! #[magic_embed(include=["magic-db/src/magdir"], exclude=["magic-db/src/magdir/der"])]
33//! struct MyMagicDb;
34//!
35//! fn main() -> Result<(), magic_rs::Error> {
36//!     let db = MyMagicDb::open()?;
37//!     // Use the database as you would with magic_rs
38//!     Ok(())
39//! }
40//! ```
41//!
42//! ## Attributes
43//!
44//! | Attribute | Type       | Required | Description |
45//! |-----------|------------|----------|-------------|
46//! | `include` | String[]   | Yes      | Paths to include in the database (files or directories) |
47//! | `exclude` | String[]   | No       | Paths to exclude from the database |
48//!
49//! ## Complete Example
50//!
51//! ```rust
52//! use magic_embed::magic_embed;
53//! use magic_rs::MagicDb;
54//! use std::fs::File;
55//! use std::env::current_exe;
56//!
57//! #[magic_embed(
58//!     include=["magic-db/src/magdir"],
59//!     exclude=["magic-db/src/magdir/der"]
60//! )]
61//! struct AppMagicDb;
62//!
63//! fn main() -> Result<(), Box<dyn std::error::Error>> {
64//!     // Open the embedded database
65//!     let db = AppMagicDb::open()?;
66//!
67//!     // Use it to detect file types
68//!     let mut file = File::open(current_exe()?)?;
69//!     let magic = db.magic_first(&mut file, None)?;
70//!
71//!     println!("Detected: {} (MIME: {})", magic.message(), magic.mime_type());
72//!     Ok(())
73//! }
74//! ```
75//!
76//! ## Build Configuration
77//!
78//! To ensure your database is rebuilt when rule files change, create a `build.rs` file:
79//!
80//! ```rust,ignore
81//! // build.rs
82//! fn main() {
83//!     println!("cargo:rerun-if-changed=magic/rules/");
84//! }
85//! ```
86//!
87//! Replace `magic/rules/` with the path to your actual rule files.
88//!
89//! ## How It Works
90//!
91//! 1. **Compile Time**: The macro compiles all specified magic rule files into a binary database
92//! 2. **Embedding**: The compiled database is embedded in your binary as a byte array
93//! 3. **Runtime**: The `open()` method deserializes the embedded database
94//!
95//! The compiled database is stored in `target/magic-db/db.bin` and will be automatically
96//! rebuilt when any included rule file is modified (considering you've added a `build.rs` script).
97//!
98//! ## Performance Considerations
99//!
100//! - The database is compiled only when source files change
101//! - Embedded databases increase binary size but eliminate runtime file I/O
102//! - Database deserialization happens once at runtime when `open()` is called
103//!
104//! ## License
105//!
106//! This project is licensed under the **GPL-3.0 License**.
107
108use std::{
109    collections::{HashMap, HashSet},
110    fs,
111    path::PathBuf,
112};
113
114use magic_rs::{MagicDb, MagicSource};
115use proc_macro::TokenStream;
116use quote::quote;
117use syn::{
118    Expr, ExprArray, ItemStruct, Meta, MetaNameValue, Token, parse::Parser, punctuated::Punctuated,
119};
120
121/// Parser for procedural macro attributes
122///
123/// Processes comma-separated key-value attributes for the `magic_embed` macro.
124struct MetaParser {
125    attr: proc_macro2::TokenStream,
126    metas: HashMap<String, Meta>,
127}
128
129impl MetaParser {
130    /// Creates a new [`MetaParser`] from a token stream
131    ///
132    /// # Arguments
133    ///
134    /// * `attr` - [`proc_macro2::TokenStream`] - Attribute token stream to parse
135    ///
136    /// # Returns
137    ///
138    /// * `Result<Self, syn::Error>` - Parsed metadata or syntax error
139    fn parse_meta(attr: proc_macro2::TokenStream) -> Result<Self, syn::Error> {
140        let mut out = HashMap::new();
141
142        // parser for a comma-separated list of Meta entries
143        let parser = Punctuated::<Meta, Token![,]>::parse_terminated;
144
145        let metas = match parser.parse2(attr.clone()) {
146            Ok(m) => m,
147            Err(e) => return Err(syn::Error::new_spanned(attr, e.to_string())),
148        };
149
150        for meta in metas {
151            out.insert(
152                meta.path()
153                    .get_ident()
154                    .ok_or(syn::Error::new_spanned(
155                        meta.clone(),
156                        "failed to process meta",
157                    ))?
158                    .to_string(),
159                meta,
160            );
161        }
162        Ok(Self {
163            attr: attr.clone(),
164            metas: out,
165        })
166    }
167
168    /// Retrieves a key-value attribute by name
169    ///
170    /// # Arguments
171    ///
172    /// * `key` - `&str` - Name of the attribute to retrieve
173    ///
174    /// # Returns
175    ///
176    /// * `Result<Option<&MetaNameValue>, syn::Error>` - Found attribute or error
177    fn get_key_value(&self, key: &str) -> Result<Option<&MetaNameValue>, syn::Error> {
178        if let Some(meta) = self.metas.get(key) {
179            match meta {
180                Meta::NameValue(m) => return Ok(Some(m)),
181                _ => {
182                    return Err(syn::Error::new_spanned(
183                        &self.attr,
184                        format!("expecting a key value attribute: {key}"),
185                    ));
186                }
187            }
188        }
189        Ok(None)
190    }
191}
192
193/// Converts a [`MetaNameValue`] array expression to a vector of strings
194///
195/// # Arguments
196///
197/// * `nv` - Name-value attribute containing array
198///
199/// # Returns
200///
201/// * `Result<Vec<(proc_macro2::Span, String)>, syn::Error>` - Vector of (span, string) tuples
202fn meta_name_value_to_string_vec(
203    nv: &MetaNameValue,
204) -> Result<Vec<(proc_macro2::Span, String)>, syn::Error> {
205    if let Expr::Array(ExprArray { elems, .. }) = &nv.value {
206        Ok(elems
207            .into_iter()
208            .filter_map(|e| match e {
209                Expr::Lit(syn::ExprLit {
210                    lit: syn::Lit::Str(lit_str),
211                    ..
212                }) => Some((lit_str.span(), lit_str.value())),
213                _ => None,
214            })
215            .collect::<Vec<_>>())
216    } else {
217        Err(syn::Error::new_spanned(
218            &nv.value,
219            "expected an array literal like [\"foo\", \"bar\"]",
220        ))
221    }
222}
223
224fn impl_magic_embed(attr: TokenStream, item: TokenStream) -> Result<TokenStream, syn::Error> {
225    // Parse the input function
226    let input_struct: ItemStruct = syn::parse2(item.into())?;
227    let struct_name = &input_struct.ident;
228
229    // convert to proc-macro2 TokenStream for syn helpers
230    let ts2: proc_macro2::TokenStream = attr.into();
231
232    let struct_vis = input_struct.vis;
233
234    let metas = MetaParser::parse_meta(ts2)?;
235
236    let exclude = if let Some(exclude) = metas.get_key_value("exclude")? {
237        meta_name_value_to_string_vec(exclude)?
238            .into_iter()
239            .map(|(s, p)| (s, PathBuf::from(p)))
240            .collect()
241    } else {
242        vec![]
243    };
244
245    let include_nv = metas.get_key_value("include")?.ok_or(syn::Error::new(
246        struct_name.span(),
247        "expected  a list of files or directory to include: \"include\" = [\"magdir\"]",
248    ))?;
249
250    let include: Vec<(proc_macro2::Span, PathBuf)> = meta_name_value_to_string_vec(include_nv)?
251        .into_iter()
252        .map(|(s, p)| (s, PathBuf::from(p)))
253        .collect();
254
255    let database_dir = {
256        let p = PathBuf::from("target").join("magic-db");
257        fs::create_dir_all(&p).map_err(|e| {
258            syn::Error::new(
259                struct_name.span(),
260                format!("failed to create directory: {e}"),
261            )
262        })?;
263
264        p.canonicalize().map_err(|e| {
265            syn::Error::new(
266                struct_name.span(),
267                format!("failed to canonicalize path: {e}"),
268            )
269        })?
270    };
271
272    let database_path = database_dir.join("db.bin");
273
274    let database_path_mod = if database_path.exists() {
275        Some(
276            database_path
277                .metadata()
278                .and_then(|m| m.modified())
279                .map_err(|e| {
280                    syn::Error::new(
281                        struct_name.span(),
282                        format!("failed to get database file metadata: {e}"),
283                    )
284                })?,
285        )
286    } else {
287        None
288    };
289
290    // we don't walk rules recursively
291    let mut wo = fs_walk::WalkOptions::new();
292    wo.files().max_depth(0).sort(true);
293
294    let mut db = MagicDb::new();
295
296    for (s, p) in include.iter().chain(exclude.iter()) {
297        if !p.exists() {
298            return Err(syn::Error::new(
299                *s,
300                format!("no such file or directory: {}", p.to_string_lossy()),
301            ));
302        }
303    }
304
305    let mut must_compile_db = false;
306    for (_, p) in include.iter() {
307        if p.is_dir() {
308            for f in wo.walk(p).flatten() {
309                let metadata = f.metadata().and_then(|m| m.modified()).map_err(|e| {
310                    syn::Error::new(
311                        struct_name.span(),
312                        format!("failed to get database file metadata: {e}"),
313                    )
314                })?;
315
316                if Some(metadata) > database_path_mod {
317                    must_compile_db = true;
318                    break;
319                }
320            }
321        } else if p.is_file() {
322        }
323    }
324
325    if must_compile_db {
326        let exclude_set: HashSet<PathBuf> = exclude.into_iter().map(|(_, p)| p).collect();
327
328        macro_rules! load_file {
329            ($span: expr, $path: expr) => {
330                let f = MagicSource::open($path).map_err(|e| {
331                    syn::Error::new(
332                        $span.clone(),
333                        format!(
334                            "failed to parse magic file={}: {e}",
335                            $path.to_string_lossy()
336                        ),
337                    )
338                })?;
339                db.load(f).map_err(|e| {
340                    syn::Error::new(
341                        $span.clone(),
342                        format!("database failed to load magic file: {e}"),
343                    )
344                })?;
345            };
346        }
347
348        for (s, p) in include.iter() {
349            if p.is_dir() {
350                for rule_file in wo.walk(p) {
351                    let rule_file = rule_file.map_err(|e| {
352                        syn::Error::new(*s, format!("failed to list rule file: {e}"))
353                    })?;
354
355                    if exclude_set.contains(&rule_file) {
356                        continue;
357                    }
358
359                    load_file!(s, &rule_file);
360                }
361            } else if p.is_file() {
362                load_file!(s, p);
363            }
364        }
365
366        // Serialize and save database
367        let mut ser = vec![];
368        db.serialize(&mut ser).map_err(|e| {
369            syn::Error::new(
370                struct_name.span(),
371                format!("failed to serialize database: {e}"),
372            )
373        })?;
374
375        fs::write(&database_path, ser).map_err(|e| {
376            syn::Error::new(
377                struct_name.span(),
378                format!("failed to save database file: {e}"),
379            )
380        })?;
381    }
382
383    let str_db_path = database_path.to_string_lossy().to_string();
384
385    // Generate the output: the original function + a print statement
386    let output = quote! {
387        /// This structure exposes an embedded compiled magic database.
388        #struct_vis struct #struct_name;
389
390        impl #struct_name {
391            const DB: &[u8] = include_bytes!(#str_db_path);
392
393            /// Opens the embedded magic database and returns a [`magic_rs::MagicDb`]
394            #struct_vis fn open() -> Result<magic_rs::MagicDb, magic_rs::Error> {
395                magic_rs::MagicDb::deserialize(&mut Self::DB.as_ref())
396            }
397        }
398    };
399
400    Ok(output.into())
401}
402
403/// Procedural macro to embed a compiled [`magic_rs::MagicDb`]
404///
405/// This attribute macro compiles magic rule files at program
406/// compile time and embeds them in the binary. The database
407/// will not be automatically rebuilt when rule files change
408/// (c.f. see Note section below).
409///
410/// # Attributes
411///
412/// * `include` - Array of paths to include in the database (required)
413/// * `exclude` - Array of paths to exclude from the database (optional)
414///
415/// # Examples
416///
417/// ```
418/// use magic_embed::magic_embed;
419/// use magic_rs::MagicDb;
420///
421/// #[magic_embed(include=["magic-db/src/magdir"], exclude=["magic-db/src/magdir/der"])]
422/// struct EmbeddedMagicDb;
423///
424/// let db: MagicDb = EmbeddedMagicDb::open().unwrap();
425/// ```
426///
427/// # Errors
428///
429/// This macro will emit a compile-time error if:
430/// - The `include` attribute is missing
431/// - Specified paths don't exist
432/// - Database compilation fails
433/// - File I/O operations fail
434///
435/// # Note
436///
437/// If you want Cargo to track changes to your rule files (e.g., `magdir/`),
438/// you **must** create a build script in your project. The proc-macro cannot
439/// track these files directly because it embeds only the compiled database,
440/// not the rule files themselves. Add a `build.rs` file like this:
441///
442/// ```ignore
443/// // build.rs
444/// fn main() {
445///     println!("cargo::rerun-if-changed=magdir/");
446/// }
447/// ```
448///
449/// Replace `magdir/` with the path to your rule files.
450#[proc_macro_attribute]
451pub fn magic_embed(attr: TokenStream, item: TokenStream) -> TokenStream {
452    match impl_magic_embed(attr, item) {
453        Ok(ts) => ts,
454        Err(e) => e.to_compile_error().into(),
455    }
456}