magic_embed/lib.rs
1//! # `magic-embed`: Compile-time Magic Database Embedding
2//!
3//! A procedural macro crate for embedding compiled [`pure_magic`](https://crates.io/crates/pure-magic) databases directly into your Rust binary.
4//! This crate provides a convenient way to bundle file type detection rules with your application,
5//! eliminating the need for external rule files at runtime.
6//!
7//! ## Features
8//!
9//! * **Compile-time Embedding**: Magic rule files are compiled and embedded during build
10//! * **Zero Runtime Dependencies**: No need to distribute separate rule files
11//! * **Flexible Configuration**: Include/exclude specific rule files or directories
12//! * **Seamless Integration**: Works with the [`pure_magic`](https://crates.io/crates/pure-magic)
13//!
14//! ## Installation
15//!
16//! Add `magic-embed` to your `Cargo.toml`:
17//!
18//! ```toml
19//! [dependencies]
20//! magic-embed = "0.1" # Replace with the latest version
21//! pure-magic = "0.1" # Required peer dependency
22//! ```
23//!
24//! ## Usage
25//!
26//! Apply the `#[magic_embed]` attribute to a struct to embed a compiled magic database:
27//!
28//! ```rust
29//! use magic_embed::magic_embed;
30//! use pure_magic::MagicDb;
31//!
32//! #[magic_embed(include=["../../magic-db/src/magdir"], exclude=["../../magic-db/src/magdir/der"])]
33//! struct MyMagicDb;
34//!
35//! fn main() -> Result<(), pure_magic::Error> {
36//! let db = MyMagicDb::open()?;
37//! // Use the database as you would with pure_magic
38//! Ok(())
39//! }
40//! ```
41//!
42//! ## Attributes
43//!
44//! | Attribute | Type | Required | Description |
45//! |-----------|------------|----------|-------------|
46//! | `include` | String[] | Yes | Paths to include in the database (files or directories) |
47//! | `exclude` | String[] | No | Paths to exclude from the database |
48//!
49//! ## Complete Example
50//!
51//! ```rust
52//! use magic_embed::magic_embed;
53//! use pure_magic::MagicDb;
54//! use std::fs::File;
55//! use std::env::current_exe;
56//!
57//! #[magic_embed(
58//! include=["../../magic-db/src/magdir"],
59//! exclude=["../../magic-db/src/magdir/der"]
60//! )]
61//! struct AppMagicDb;
62//!
63//! fn main() -> Result<(), Box<dyn std::error::Error>> {
64//! // Open the embedded database
65//! let db = AppMagicDb::open()?;
66//!
67//! // Use it to detect file types
68//! let mut file = File::open(current_exe()?)?;
69//! let magic = db.first_magic(&mut file, None)?;
70//!
71//! println!("Detected: {} (MIME: {})", magic.message(), magic.mime_type());
72//! Ok(())
73//! }
74//! ```
75//!
76//! ## Build Configuration
77//!
78//! To ensure your database is rebuilt when rule files change, create a `build.rs` file:
79//!
80//! ```rust,ignore
81//! // build.rs
82//! fn main() {
83//! println!("cargo:rerun-if-changed=magic/rules/");
84//! }
85//! ```
86//!
87//! Replace `magic/rules/` with the path to your actual rule files.
88//!
89//! ## How It Works
90//!
91//! 1. **Compile Time**: The macro compiles all specified magic rule files into a binary database
92//! 2. **Embedding**: The compiled database is embedded in your binary as a byte array
93//! 3. **Runtime**: The `open()` method deserializes the embedded database
94//!
95//! The compiled database is stored in `target/magic-db/db.bin` and will be automatically
96//! rebuilt when any included rule file is modified (considering you've added a `build.rs` script).
97//!
98//! ## Performance Considerations
99//!
100//! - The database is compiled only when source files change
101//! - Embedded databases increase binary size but eliminate runtime file I/O
102//! - Database deserialization happens once at runtime when `open()` is called
103//!
104//! ## License
105//!
106//! This project is licensed under the **GPL-3.0 License**.
107
108use std::{
109 collections::{HashMap, HashSet},
110 fs,
111 path::PathBuf,
112};
113
114use proc_macro::TokenStream;
115use pure_magic::{MagicDb, MagicSource};
116use quote::quote;
117use syn::{
118 Expr, ExprArray, ItemStruct, Meta, MetaNameValue, Token, parse::Parser, punctuated::Punctuated,
119};
120
121/// Parser for procedural macro attributes
122///
123/// Processes comma-separated key-value attributes for the `magic_embed` macro.
124struct MetaParser {
125 attr: proc_macro2::TokenStream,
126 metas: HashMap<String, Meta>,
127}
128
129impl MetaParser {
130 /// Creates a new [`MetaParser`] from a token stream
131 ///
132 /// # Arguments
133 ///
134 /// * `attr` - [`proc_macro2::TokenStream`] - Attribute token stream to parse
135 ///
136 /// # Returns
137 ///
138 /// * `Result<Self, syn::Error>` - Parsed metadata or syntax error
139 fn parse_meta(attr: proc_macro2::TokenStream) -> Result<Self, syn::Error> {
140 let mut out = HashMap::new();
141
142 // parser for a comma-separated list of Meta entries
143 let parser = Punctuated::<Meta, Token![,]>::parse_terminated;
144
145 let metas = match parser.parse2(attr.clone()) {
146 Ok(m) => m,
147 Err(e) => return Err(syn::Error::new_spanned(attr, e.to_string())),
148 };
149
150 for meta in metas {
151 out.insert(
152 meta.path()
153 .get_ident()
154 .ok_or(syn::Error::new_spanned(
155 meta.clone(),
156 "failed to process meta",
157 ))?
158 .to_string(),
159 meta,
160 );
161 }
162 Ok(Self {
163 attr: attr.clone(),
164 metas: out,
165 })
166 }
167
168 /// Retrieves a key-value attribute by name
169 ///
170 /// # Arguments
171 ///
172 /// * `key` - `&str` - Name of the attribute to retrieve
173 ///
174 /// # Returns
175 ///
176 /// * `Result<Option<&MetaNameValue>, syn::Error>` - Found attribute or error
177 fn get_key_value(&self, key: &str) -> Result<Option<&MetaNameValue>, syn::Error> {
178 if let Some(meta) = self.metas.get(key) {
179 match meta {
180 Meta::NameValue(m) => return Ok(Some(m)),
181 _ => {
182 return Err(syn::Error::new_spanned(
183 &self.attr,
184 format!("expecting a key value attribute: {key}"),
185 ));
186 }
187 }
188 }
189 Ok(None)
190 }
191}
192
193/// Converts a [`MetaNameValue`] array expression to a vector of strings
194///
195/// # Arguments
196///
197/// * `nv` - Name-value attribute containing array
198///
199/// # Returns
200///
201/// * `Result<Vec<(proc_macro2::Span, String)>, syn::Error>` - Vector of (span, string) tuples
202fn meta_name_value_to_string_vec(
203 nv: &MetaNameValue,
204) -> Result<Vec<(proc_macro2::Span, String)>, syn::Error> {
205 if let Expr::Array(ExprArray { elems, .. }) = &nv.value {
206 Ok(elems
207 .into_iter()
208 .filter_map(|e| match e {
209 Expr::Lit(syn::ExprLit {
210 lit: syn::Lit::Str(lit_str),
211 ..
212 }) => Some((lit_str.span(), lit_str.value())),
213 _ => None,
214 })
215 .collect::<Vec<_>>())
216 } else {
217 Err(syn::Error::new_spanned(
218 &nv.value,
219 "expected an array literal like [\"foo\", \"bar\"]",
220 ))
221 }
222}
223
224fn impl_magic_embed(attr: TokenStream, item: TokenStream) -> Result<TokenStream, syn::Error> {
225 // Parse the input function
226 let input_struct: ItemStruct = syn::parse2(item.into())?;
227 let struct_name = &input_struct.ident;
228 let cs = proc_macro::Span::call_site();
229
230 let Some(source_file) = cs.local_file() else {
231 return Ok(quote! {}.into());
232 };
233
234 let source_dir = source_file.parent().unwrap();
235
236 // convert to proc-macro2 TokenStream for syn helpers
237 let ts2: proc_macro2::TokenStream = attr.into();
238
239 let struct_vis = input_struct.vis;
240
241 let metas = MetaParser::parse_meta(ts2)?;
242
243 let exclude = if let Some(exclude) = metas.get_key_value("exclude")? {
244 meta_name_value_to_string_vec(exclude)?
245 .into_iter()
246 .map(|(s, p)| (s, source_dir.join(p)))
247 .collect()
248 } else {
249 vec![]
250 };
251
252 let include_nv = metas.get_key_value("include")?.ok_or(syn::Error::new(
253 struct_name.span(),
254 "expected a list of files or directory to include: \"include\" = [\"magdir\"]",
255 ))?;
256
257 let include: Vec<(proc_macro2::Span, PathBuf)> = meta_name_value_to_string_vec(include_nv)?
258 .into_iter()
259 .map(|(s, p)| (s, source_dir.join(p)))
260 .collect();
261
262 let database_dir = {
263 let p = PathBuf::from("target").join("magic-db");
264 fs::create_dir_all(&p).map_err(|e| {
265 syn::Error::new(
266 struct_name.span(),
267 format!("failed to create directory: {e}"),
268 )
269 })?;
270
271 p.canonicalize().map_err(|e| {
272 syn::Error::new(
273 struct_name.span(),
274 format!("failed to canonicalize path: {e}"),
275 )
276 })?
277 };
278
279 let database_path = database_dir.join("db.bin");
280
281 let database_path_mod = if database_path.exists() {
282 Some(
283 database_path
284 .metadata()
285 .and_then(|m| m.modified())
286 .map_err(|e| {
287 syn::Error::new(
288 struct_name.span(),
289 format!("failed to get database file metadata: {e}"),
290 )
291 })?,
292 )
293 } else {
294 None
295 };
296
297 // we don't walk rules recursively
298 let mut wo = fs_walk::WalkOptions::new();
299 wo.files().max_depth(0).sort(true);
300
301 let mut db = MagicDb::new();
302
303 for (s, p) in include.iter().chain(exclude.iter()) {
304 if !p.exists() {
305 return Err(syn::Error::new(
306 *s,
307 format!("no such file or directory: {}", p.to_string_lossy()),
308 ));
309 }
310 }
311
312 let mut must_compile_db = false;
313 for (_, p) in include.iter() {
314 if p.is_dir() {
315 for f in wo.walk(p).flatten() {
316 let metadata = f.metadata().and_then(|m| m.modified()).map_err(|e| {
317 syn::Error::new(
318 struct_name.span(),
319 format!("failed to get database file metadata: {e}"),
320 )
321 })?;
322
323 if Some(metadata) > database_path_mod {
324 must_compile_db = true;
325 break;
326 }
327 }
328 } else if p.is_file() {
329 }
330 }
331
332 if must_compile_db {
333 let exclude_set: HashSet<PathBuf> = exclude.into_iter().map(|(_, p)| p).collect();
334
335 macro_rules! load_file {
336 ($span: expr, $path: expr) => {
337 let f = MagicSource::open($path).map_err(|e| {
338 syn::Error::new(
339 $span.clone(),
340 format!(
341 "failed to parse magic file={}: {e}",
342 $path.to_string_lossy()
343 ),
344 )
345 })?;
346 db.load(f).map_err(|e| {
347 syn::Error::new(
348 $span.clone(),
349 format!("database failed to load magic file: {e}"),
350 )
351 })?;
352 };
353 }
354
355 for (s, p) in include.iter() {
356 if p.is_dir() {
357 for rule_file in wo.walk(p) {
358 let rule_file = rule_file.map_err(|e| {
359 syn::Error::new(*s, format!("failed to list rule file: {e}"))
360 })?;
361
362 if exclude_set.contains(&rule_file) {
363 continue;
364 }
365
366 load_file!(s, &rule_file);
367 }
368 } else if p.is_file() {
369 load_file!(s, p);
370 }
371 }
372
373 // Serialize and save database
374 let mut ser = vec![];
375 db.serialize(&mut ser).map_err(|e| {
376 syn::Error::new(
377 struct_name.span(),
378 format!("failed to serialize database: {e}"),
379 )
380 })?;
381
382 fs::write(&database_path, ser).map_err(|e| {
383 syn::Error::new(
384 struct_name.span(),
385 format!("failed to save database file: {e}"),
386 )
387 })?;
388 }
389
390 let str_db_path = database_path.to_string_lossy().to_string();
391
392 // Generate the output: the original function + a print statement
393 let output = quote! {
394 /// This structure exposes an embedded compiled magic database.
395 #struct_vis struct #struct_name;
396
397 impl #struct_name {
398 const DB: &[u8] = include_bytes!(#str_db_path);
399
400 /// Opens the embedded magic database and returns a [`pure_magic::MagicDb`]
401 #struct_vis fn open() -> Result<pure_magic::MagicDb, pure_magic::Error> {
402 pure_magic::MagicDb::deserialize(&mut Self::DB.as_ref())
403 }
404 }
405 };
406
407 Ok(output.into())
408}
409
410/// Procedural macro to embed a compiled [`pure_magic::MagicDb`]
411///
412/// This attribute macro compiles magic rule files at program
413/// compile time and embeds them in the binary. The database
414/// will not be automatically rebuilt when rule files change
415/// (c.f. see Note section below).
416///
417/// # Attributes
418///
419/// * `include` - Array of paths to include in the database (required)
420/// * `exclude` - Array of paths to exclude from the database (optional)
421///
422/// # Examples
423///
424/// ```
425/// use magic_embed::magic_embed;
426/// use pure_magic::MagicDb;
427///
428/// #[magic_embed(include=["../../magic-db/src/magdir"], exclude=["../../magic-db/src/magdir/der"])]
429/// struct EmbeddedMagicDb;
430///
431/// let db: MagicDb = EmbeddedMagicDb::open().unwrap();
432/// ```
433///
434/// # Errors
435///
436/// This macro will emit a compile-time error if:
437/// - The `include` attribute is missing
438/// - Specified paths don't exist
439/// - Database compilation fails
440/// - File I/O operations fail
441///
442/// # Note
443///
444/// If you want Cargo to track changes to your rule files (e.g., `magdir/`),
445/// you **must** create a build script in your project. The proc-macro cannot
446/// track these files directly because it embeds only the compiled database,
447/// not the rule files themselves. Add a `build.rs` file like this:
448///
449/// ```ignore
450/// // build.rs
451/// fn main() {
452/// println!("cargo::rerun-if-changed=magdir/");
453/// }
454/// ```
455///
456/// Replace `magdir/` with the path to your rule files.
457#[proc_macro_attribute]
458pub fn magic_embed(attr: TokenStream, item: TokenStream) -> TokenStream {
459 match impl_magic_embed(attr, item) {
460 Ok(ts) => ts,
461 Err(e) => e.to_compile_error().into(),
462 }
463}