styx_embed_macros/
lib.rs

1#![doc = include_str!("../README.md")]
2//! Proc macros for embedding Styx schemas in binaries.
3//!
4//! These macros compress schemas at compile time and embed them
5//! with a magic header so they can be extracted without execution.
6//!
7//! Each schema must have a `meta { id ... }` block. The ID is used to
8//! generate a unique static name, allowing multiple schemas to coexist
9//! in the same binary.
10
11use proc_macro::{Delimiter, Group, Literal, Punct, Spacing, TokenStream, TokenTree};
12use unsynn::{Comma, DelimitedVec, Parse, TokenIter};
13
14/// Magic bytes that identify an embedded Styx schema.
15/// 16 bytes: "STYX_SCHEMA_V2\0\0"
16const MAGIC: &[u8; 16] = b"STYX_SCHEMA_V2\0\0";
17
18/// Extract the schema ID from a parsed styx document.
19///
20/// Looks for `meta { id <value> }` at the root level.
21fn extract_schema_id(schema: &str) -> Result<String, String> {
22    let value = styx_tree::parse(schema).map_err(|e| format!("failed to parse schema: {e}"))?;
23
24    let obj = value
25        .as_object()
26        .ok_or_else(|| "schema root must be an object".to_string())?;
27
28    let meta = obj
29        .get("meta")
30        .ok_or_else(|| "schema must have a `meta` block".to_string())?;
31
32    let meta_obj = meta
33        .as_object()
34        .ok_or_else(|| "`meta` must be an object".to_string())?;
35
36    let id_value = meta_obj
37        .get("id")
38        .ok_or_else(|| "`meta` block must have an `id` field".to_string())?;
39
40    // ID can be a bare identifier or a quoted string
41    if let Some(s) = id_value.as_str() {
42        return Ok(s.to_string());
43    }
44
45    Err("`meta.id` must be a string or identifier".to_string())
46}
47
48/// Sanitize an ID for the human-readable part of the symbol name.
49///
50/// Replaces non-alphanumeric characters with underscores.
51fn sanitize_id(id: &str) -> String {
52    let mut result = String::with_capacity(id.len());
53    for c in id.chars() {
54        if c.is_ascii_alphanumeric() {
55            result.push(c);
56        } else {
57            result.push('_');
58        }
59    }
60    // Ensure it doesn't start with a digit
61    if result.chars().next().is_some_and(|c| c.is_ascii_digit()) {
62        result.insert(0, '_');
63    }
64    result
65}
66
67/// Generate a unique symbol suffix from a schema ID.
68///
69/// Format: `{sanitized_id}_{hash8}` where hash8 is 8 hex chars of blake3.
70/// This gives human-readable symbols with guaranteed uniqueness.
71fn id_to_symbol_suffix(id: &str) -> String {
72    let sanitized = sanitize_id(id);
73    let hash = blake3::hash(id.as_bytes());
74    let bytes = hash.as_bytes();
75    format!(
76        "{}_{:02x}{:02x}{:02x}{:02x}",
77        sanitized, bytes[0], bytes[1], bytes[2], bytes[3]
78    )
79}
80
81/// Build the embedded blob for a single schema.
82///
83/// Format (V2 - single schema per blob):
84/// ```text
85/// STYX_SCHEMA_V2\0\0           // 16 bytes magic
86/// <decompressed_len:u32le>
87/// <compressed_len:u32le>
88/// <blake3:32bytes>             // hash of decompressed content
89/// <lz4 compressed schema>
90/// ```
91fn build_embedded_blob(schema: &str) -> Vec<u8> {
92    let decompressed = schema.as_bytes();
93    let hash = blake3::hash(decompressed);
94    let compressed = lz4_flex::compress_prepend_size(decompressed);
95
96    let mut blob = Vec::with_capacity(16 + 4 + 4 + 32 + compressed.len());
97    blob.extend_from_slice(MAGIC);
98    blob.extend_from_slice(&(decompressed.len() as u32).to_le_bytes());
99    blob.extend_from_slice(&(compressed.len() as u32).to_le_bytes());
100    blob.extend_from_slice(hash.as_bytes());
101    blob.extend_from_slice(&compressed);
102    blob
103}
104
105/// Parse a string literal (regular or raw) and return its content.
106fn parse_string_literal(lit: &unsynn::Literal) -> Option<String> {
107    let s = lit.to_string();
108
109    // Raw string: r#"..."# or r"..."
110    if let Some(after_r) = s.strip_prefix("r") {
111        // Find the opening quote pattern (r#, r##, etc.)
112        let hash_count = after_r.chars().take_while(|&c| c == '#').count();
113        let prefix_len = hash_count + 1; // hashes + '"'
114        let suffix_len = 1 + hash_count; // '"' + hashes
115
116        if after_r.len() >= prefix_len + suffix_len {
117            return Some(after_r[prefix_len..after_r.len() - suffix_len].to_string());
118        }
119    }
120
121    // Regular string: "..."
122    if s.starts_with('"') && s.ends_with('"') && s.len() >= 2 {
123        let inner = &s[1..s.len() - 1];
124        // Handle basic escapes
125        let mut result = String::new();
126        let mut chars = inner.chars().peekable();
127        while let Some(c) = chars.next() {
128            if c == '\\' {
129                match chars.next() {
130                    Some('n') => result.push('\n'),
131                    Some('r') => result.push('\r'),
132                    Some('t') => result.push('\t'),
133                    Some('\\') => result.push('\\'),
134                    Some('"') => result.push('"'),
135                    Some('0') => result.push('\0'),
136                    Some(other) => {
137                        result.push('\\');
138                        result.push(other);
139                    }
140                    None => result.push('\\'),
141                }
142            } else {
143                result.push(c);
144            }
145        }
146        return Some(result);
147    }
148
149    None
150}
151
152/// Generate the static declaration for an embedded schema.
153fn generate_static(schema: &str) -> Result<TokenStream, String> {
154    let id = extract_schema_id(schema)?;
155    let suffix = id_to_symbol_suffix(&id);
156    let blob = build_embedded_blob(schema);
157    let blob_len = blob.len();
158
159    // Generate: [u8; N] = [b0, b1, b2, ...];
160    let mut array_contents = Vec::new();
161    for (i, byte) in blob.iter().enumerate() {
162        array_contents.push(TokenTree::Literal(Literal::u8_unsuffixed(*byte)));
163        if i < blob.len() - 1 {
164            array_contents.push(TokenTree::Punct(Punct::new(',', Spacing::Alone)));
165        }
166    }
167
168    let output = format!(
169        r#"
170        #[used]
171        #[unsafe(no_mangle)]
172        #[cfg_attr(target_os = "macos", unsafe(link_section = "__DATA,__styx_schemas"))]
173        #[cfg_attr(target_os = "linux", unsafe(link_section = ".styx_schemas"))]
174        #[cfg_attr(target_os = "windows", unsafe(link_section = ".styx"))]
175        static __STYX_SCHEMA_{suffix}: [u8; {blob_len}] = "#
176    );
177
178    let mut result: TokenStream = output.parse().unwrap();
179    let array_group = TokenTree::Group(Group::new(
180        Delimiter::Bracket,
181        array_contents.into_iter().collect(),
182    ));
183    result.extend(std::iter::once(array_group));
184    result.extend(";".parse::<TokenStream>().unwrap());
185
186    Ok(result)
187}
188
189/// Embed a schema from an inline string literal.
190///
191/// The schema must have a `meta { id ... }` block.
192///
193/// # Example
194///
195/// ```rust,ignore
196/// styx_embed::embed_inline!(r#"
197/// meta { id my-schema, version 1.0.0 }
198/// schema { @ @string }
199/// "#);
200/// ```
201#[proc_macro]
202pub fn embed_inline(input: TokenStream) -> TokenStream {
203    let mut tokens = TokenIter::new(proc_macro2::TokenStream::from(input));
204
205    let literal: unsynn::Literal = match Parse::parse(&mut tokens) {
206        Ok(l) => l,
207        Err(e) => {
208            return format!("compile_error!(\"expected string literal: {e}\")")
209                .parse()
210                .unwrap();
211        }
212    };
213
214    let schema = match parse_string_literal(&literal) {
215        Some(s) => s,
216        None => {
217            return "compile_error!(\"expected string literal\")"
218                .parse()
219                .unwrap();
220        }
221    };
222
223    match generate_static(&schema) {
224        Ok(ts) => ts,
225        Err(e) => format!("compile_error!(\"{}\")", e.replace('"', "\\\""))
226            .parse()
227            .unwrap(),
228    }
229}
230
231/// Embed a schema from a file (reads at compile time).
232///
233/// The schema must have a `meta { id ... }` block.
234///
235/// # Example
236///
237/// ```rust,ignore
238/// styx_embed::embed_file!("schema.styx");
239/// ```
240#[proc_macro]
241pub fn embed_file(input: TokenStream) -> TokenStream {
242    let mut tokens = TokenIter::new(proc_macro2::TokenStream::from(input));
243
244    let literal: unsynn::Literal = match Parse::parse(&mut tokens) {
245        Ok(l) => l,
246        Err(e) => {
247            return format!("compile_error!(\"expected file path string: {e}\")")
248                .parse()
249                .unwrap();
250        }
251    };
252
253    let path = match parse_string_literal(&literal) {
254        Some(s) => s,
255        None => {
256            return "compile_error!(\"expected string literal for file path\")"
257                .parse()
258                .unwrap();
259        }
260    };
261
262    let content = match std::fs::read_to_string(&path) {
263        Ok(c) => c,
264        Err(e) => {
265            return format!("compile_error!(\"failed to read {}: {}\")", path, e)
266                .parse()
267                .unwrap();
268        }
269    };
270
271    match generate_static(&content) {
272        Ok(ts) => ts,
273        Err(e) => format!("compile_error!(\"{}\")", e.replace('"', "\\\""))
274            .parse()
275            .unwrap(),
276    }
277}
278
279/// Embed multiple schema files (reads at compile time).
280///
281/// Each schema must have a `meta { id ... }` block. Each generates
282/// its own static with a unique name derived from the ID.
283///
284/// # Example
285///
286/// ```rust,ignore
287/// styx_embed::embed_files!(
288///     "config.styx",
289///     "plugin.styx",
290/// );
291/// ```
292#[proc_macro]
293pub fn embed_files(input: TokenStream) -> TokenStream {
294    let mut tokens = TokenIter::new(proc_macro2::TokenStream::from(input));
295
296    let literals: DelimitedVec<unsynn::Literal, Comma> = match Parse::parse(&mut tokens) {
297        Ok(l) => l,
298        Err(e) => {
299            return format!("compile_error!(\"expected file path strings: {e}\")")
300                .parse()
301                .unwrap();
302        }
303    };
304
305    let mut result = TokenStream::new();
306
307    for delimited in literals.iter() {
308        let path = match parse_string_literal(&delimited.value) {
309            Some(s) => s,
310            None => {
311                return "compile_error!(\"expected string literal for file path\")"
312                    .parse()
313                    .unwrap();
314            }
315        };
316
317        let content = match std::fs::read_to_string(&path) {
318            Ok(c) => c,
319            Err(e) => {
320                return format!("compile_error!(\"failed to read {}: {}\")", path, e)
321                    .parse()
322                    .unwrap();
323            }
324        };
325
326        match generate_static(&content) {
327            Ok(ts) => result.extend(ts),
328            Err(e) => {
329                return format!("compile_error!(\"{}\")", e.replace('"', "\\\""))
330                    .parse()
331                    .unwrap();
332            }
333        }
334    }
335
336    if result.is_empty() {
337        return "compile_error!(\"embed_files! requires at least one file\")"
338            .parse()
339            .unwrap();
340    }
341
342    result
343}
344
345/// Embed a schema file from OUT_DIR (for build script output).
346///
347/// The schema must have a `meta { id ... }` block.
348///
349/// # Example
350///
351/// ```rust,ignore
352/// // In build.rs:
353/// // facet_styx::generate_schema::<Config>("schema.styx");
354///
355/// // In src/main.rs:
356/// styx_embed::embed_outdir_file!("schema.styx");
357/// ```
358#[proc_macro]
359pub fn embed_outdir_file(input: TokenStream) -> TokenStream {
360    let mut tokens = TokenIter::new(proc_macro2::TokenStream::from(input));
361
362    let literal: unsynn::Literal = match Parse::parse(&mut tokens) {
363        Ok(l) => l,
364        Err(e) => {
365            return format!("compile_error!(\"expected filename string: {e}\")")
366                .parse()
367                .unwrap();
368        }
369    };
370
371    let filename = match parse_string_literal(&literal) {
372        Some(s) => s,
373        None => {
374            return "compile_error!(\"expected string literal for filename\")"
375                .parse()
376                .unwrap();
377        }
378    };
379
380    let out_dir = match std::env::var("OUT_DIR") {
381        Ok(dir) => dir,
382        Err(_) => {
383            return "compile_error!(\"OUT_DIR not set - this macro must be used in a crate with a build script\")"
384                .parse()
385                .unwrap()
386        }
387    };
388
389    let path = std::path::Path::new(&out_dir).join(&filename);
390    let path_str = path.display().to_string();
391
392    let content = match std::fs::read_to_string(&path) {
393        Ok(c) => c,
394        Err(e) => {
395            return format!("compile_error!(\"failed to read {}: {}\")", path_str, e)
396                .parse()
397                .unwrap();
398        }
399    };
400
401    match generate_static(&content) {
402        Ok(ts) => ts,
403        Err(e) => format!("compile_error!(\"{}\")", e.replace('"', "\\\""))
404            .parse()
405            .unwrap(),
406    }
407}
408
409// Keep the old names as aliases for compatibility
410#[proc_macro]
411pub fn embed_schema(input: TokenStream) -> TokenStream {
412    embed_inline(input)
413}
414
415#[proc_macro]
416pub fn embed_schemas(input: TokenStream) -> TokenStream {
417    embed_inline(input)
418}