styx_embed_macros/
lib.rs

1//! Proc macros for embedding Styx schemas in binaries.
2//!
3//! These macros compress schemas at compile time and embed them
4//! with a magic header so they can be extracted without execution.
5//!
6//! Each schema must have a `meta { id ... }` block. The ID is used to
7//! generate a unique static name, allowing multiple schemas to coexist
8//! in the same binary.
9
10use proc_macro::{Delimiter, Group, Literal, Punct, Spacing, TokenStream, TokenTree};
11use unsynn::{Comma, DelimitedVec, Parse, TokenIter};
12
13/// Magic bytes that identify an embedded Styx schema.
14/// 16 bytes: "STYX_SCHEMA_V2\0\0"
15const MAGIC: &[u8; 16] = b"STYX_SCHEMA_V2\0\0";
16
17/// Extract the schema ID from a parsed styx document.
18///
19/// Looks for `meta { id <value> }` at the root level.
20fn extract_schema_id(schema: &str) -> Result<String, String> {
21    let value = styx_tree::parse(schema).map_err(|e| format!("failed to parse schema: {e}"))?;
22
23    let obj = value
24        .as_object()
25        .ok_or_else(|| "schema root must be an object".to_string())?;
26
27    let meta = obj
28        .get("meta")
29        .ok_or_else(|| "schema must have a `meta` block".to_string())?;
30
31    let meta_obj = meta
32        .as_object()
33        .ok_or_else(|| "`meta` must be an object".to_string())?;
34
35    let id_value = meta_obj
36        .get("id")
37        .ok_or_else(|| "`meta` block must have an `id` field".to_string())?;
38
39    // ID can be a bare identifier or a quoted string
40    if let Some(s) = id_value.as_str() {
41        return Ok(s.to_string());
42    }
43
44    Err("`meta.id` must be a string or identifier".to_string())
45}
46
47/// Sanitize an ID for the human-readable part of the symbol name.
48///
49/// Replaces non-alphanumeric characters with underscores.
50fn sanitize_id(id: &str) -> String {
51    let mut result = String::with_capacity(id.len());
52    for c in id.chars() {
53        if c.is_ascii_alphanumeric() {
54            result.push(c);
55        } else {
56            result.push('_');
57        }
58    }
59    // Ensure it doesn't start with a digit
60    if result.chars().next().is_some_and(|c| c.is_ascii_digit()) {
61        result.insert(0, '_');
62    }
63    result
64}
65
66/// Generate a unique symbol suffix from a schema ID.
67///
68/// Format: `{sanitized_id}_{hash8}` where hash8 is 8 hex chars of blake3.
69/// This gives human-readable symbols with guaranteed uniqueness.
70fn id_to_symbol_suffix(id: &str) -> String {
71    let sanitized = sanitize_id(id);
72    let hash = blake3::hash(id.as_bytes());
73    let bytes = hash.as_bytes();
74    format!(
75        "{}_{:02x}{:02x}{:02x}{:02x}",
76        sanitized, bytes[0], bytes[1], bytes[2], bytes[3]
77    )
78}
79
80/// Build the embedded blob for a single schema.
81///
82/// Format (V2 - single schema per blob):
83/// ```text
84/// STYX_SCHEMA_V2\0\0           // 16 bytes magic
85/// <decompressed_len:u32le>
86/// <compressed_len:u32le>
87/// <blake3:32bytes>             // hash of decompressed content
88/// <lz4 compressed schema>
89/// ```
90fn build_embedded_blob(schema: &str) -> Vec<u8> {
91    let decompressed = schema.as_bytes();
92    let hash = blake3::hash(decompressed);
93    let compressed = lz4_flex::compress_prepend_size(decompressed);
94
95    let mut blob = Vec::with_capacity(16 + 4 + 4 + 32 + compressed.len());
96    blob.extend_from_slice(MAGIC);
97    blob.extend_from_slice(&(decompressed.len() as u32).to_le_bytes());
98    blob.extend_from_slice(&(compressed.len() as u32).to_le_bytes());
99    blob.extend_from_slice(hash.as_bytes());
100    blob.extend_from_slice(&compressed);
101    blob
102}
103
104/// Parse a string literal (regular or raw) and return its content.
105fn parse_string_literal(lit: &unsynn::Literal) -> Option<String> {
106    let s = lit.to_string();
107
108    // Raw string: r#"..."# or r"..."
109    if let Some(after_r) = s.strip_prefix("r") {
110        // Find the opening quote pattern (r#, r##, etc.)
111        let hash_count = after_r.chars().take_while(|&c| c == '#').count();
112        let prefix_len = hash_count + 1; // hashes + '"'
113        let suffix_len = 1 + hash_count; // '"' + hashes
114
115        if after_r.len() >= prefix_len + suffix_len {
116            return Some(after_r[prefix_len..after_r.len() - suffix_len].to_string());
117        }
118    }
119
120    // Regular string: "..."
121    if s.starts_with('"') && s.ends_with('"') && s.len() >= 2 {
122        let inner = &s[1..s.len() - 1];
123        // Handle basic escapes
124        let mut result = String::new();
125        let mut chars = inner.chars().peekable();
126        while let Some(c) = chars.next() {
127            if c == '\\' {
128                match chars.next() {
129                    Some('n') => result.push('\n'),
130                    Some('r') => result.push('\r'),
131                    Some('t') => result.push('\t'),
132                    Some('\\') => result.push('\\'),
133                    Some('"') => result.push('"'),
134                    Some('0') => result.push('\0'),
135                    Some(other) => {
136                        result.push('\\');
137                        result.push(other);
138                    }
139                    None => result.push('\\'),
140                }
141            } else {
142                result.push(c);
143            }
144        }
145        return Some(result);
146    }
147
148    None
149}
150
151/// Generate the static declaration for an embedded schema.
152fn generate_static(schema: &str) -> Result<TokenStream, String> {
153    let id = extract_schema_id(schema)?;
154    let suffix = id_to_symbol_suffix(&id);
155    let blob = build_embedded_blob(schema);
156    let blob_len = blob.len();
157
158    // Generate: [u8; N] = [b0, b1, b2, ...];
159    let mut array_contents = Vec::new();
160    for (i, byte) in blob.iter().enumerate() {
161        array_contents.push(TokenTree::Literal(Literal::u8_unsuffixed(*byte)));
162        if i < blob.len() - 1 {
163            array_contents.push(TokenTree::Punct(Punct::new(',', Spacing::Alone)));
164        }
165    }
166
167    let output = format!(
168        r#"
169        #[used]
170        #[unsafe(no_mangle)]
171        #[cfg_attr(target_os = "macos", unsafe(link_section = "__DATA,__styx_schemas"))]
172        #[cfg_attr(target_os = "linux", unsafe(link_section = ".styx_schemas"))]
173        #[cfg_attr(target_os = "windows", unsafe(link_section = ".styx"))]
174        static __STYX_SCHEMA_{suffix}: [u8; {blob_len}] = "#
175    );
176
177    let mut result: TokenStream = output.parse().unwrap();
178    let array_group = TokenTree::Group(Group::new(
179        Delimiter::Bracket,
180        array_contents.into_iter().collect(),
181    ));
182    result.extend(std::iter::once(array_group));
183    result.extend(";".parse::<TokenStream>().unwrap());
184
185    Ok(result)
186}
187
188/// Embed a schema from an inline string literal.
189///
190/// The schema must have a `meta { id ... }` block.
191///
192/// # Example
193///
194/// ```rust,ignore
195/// styx_embed::embed_inline!(r#"
196/// meta { id my-schema, version 1.0.0 }
197/// schema { @ @string }
198/// "#);
199/// ```
200#[proc_macro]
201pub fn embed_inline(input: TokenStream) -> TokenStream {
202    let mut tokens = TokenIter::new(proc_macro2::TokenStream::from(input));
203
204    let literal: unsynn::Literal = match Parse::parse(&mut tokens) {
205        Ok(l) => l,
206        Err(e) => {
207            return format!("compile_error!(\"expected string literal: {e}\")")
208                .parse()
209                .unwrap();
210        }
211    };
212
213    let schema = match parse_string_literal(&literal) {
214        Some(s) => s,
215        None => {
216            return "compile_error!(\"expected string literal\")"
217                .parse()
218                .unwrap();
219        }
220    };
221
222    match generate_static(&schema) {
223        Ok(ts) => ts,
224        Err(e) => format!("compile_error!(\"{}\")", e.replace('"', "\\\""))
225            .parse()
226            .unwrap(),
227    }
228}
229
230/// Embed a schema from a file (reads at compile time).
231///
232/// The schema must have a `meta { id ... }` block.
233///
234/// # Example
235///
236/// ```rust,ignore
237/// styx_embed::embed_file!("schema.styx");
238/// ```
239#[proc_macro]
240pub fn embed_file(input: TokenStream) -> TokenStream {
241    let mut tokens = TokenIter::new(proc_macro2::TokenStream::from(input));
242
243    let literal: unsynn::Literal = match Parse::parse(&mut tokens) {
244        Ok(l) => l,
245        Err(e) => {
246            return format!("compile_error!(\"expected file path string: {e}\")")
247                .parse()
248                .unwrap();
249        }
250    };
251
252    let path = match parse_string_literal(&literal) {
253        Some(s) => s,
254        None => {
255            return "compile_error!(\"expected string literal for file path\")"
256                .parse()
257                .unwrap();
258        }
259    };
260
261    let content = match std::fs::read_to_string(&path) {
262        Ok(c) => c,
263        Err(e) => {
264            return format!("compile_error!(\"failed to read {}: {}\")", path, e)
265                .parse()
266                .unwrap();
267        }
268    };
269
270    match generate_static(&content) {
271        Ok(ts) => ts,
272        Err(e) => format!("compile_error!(\"{}\")", e.replace('"', "\\\""))
273            .parse()
274            .unwrap(),
275    }
276}
277
278/// Embed multiple schema files (reads at compile time).
279///
280/// Each schema must have a `meta { id ... }` block. Each generates
281/// its own static with a unique name derived from the ID.
282///
283/// # Example
284///
285/// ```rust,ignore
286/// styx_embed::embed_files!(
287///     "config.styx",
288///     "plugin.styx",
289/// );
290/// ```
291#[proc_macro]
292pub fn embed_files(input: TokenStream) -> TokenStream {
293    let mut tokens = TokenIter::new(proc_macro2::TokenStream::from(input));
294
295    let literals: DelimitedVec<unsynn::Literal, Comma> = match Parse::parse(&mut tokens) {
296        Ok(l) => l,
297        Err(e) => {
298            return format!("compile_error!(\"expected file path strings: {e}\")")
299                .parse()
300                .unwrap();
301        }
302    };
303
304    let mut result = TokenStream::new();
305
306    for delimited in literals.iter() {
307        let path = match parse_string_literal(&delimited.value) {
308            Some(s) => s,
309            None => {
310                return "compile_error!(\"expected string literal for file path\")"
311                    .parse()
312                    .unwrap();
313            }
314        };
315
316        let content = match std::fs::read_to_string(&path) {
317            Ok(c) => c,
318            Err(e) => {
319                return format!("compile_error!(\"failed to read {}: {}\")", path, e)
320                    .parse()
321                    .unwrap();
322            }
323        };
324
325        match generate_static(&content) {
326            Ok(ts) => result.extend(ts),
327            Err(e) => {
328                return format!("compile_error!(\"{}\")", e.replace('"', "\\\""))
329                    .parse()
330                    .unwrap();
331            }
332        }
333    }
334
335    if result.is_empty() {
336        return "compile_error!(\"embed_files! requires at least one file\")"
337            .parse()
338            .unwrap();
339    }
340
341    result
342}
343
344/// Embed a schema file from OUT_DIR (for build script output).
345///
346/// The schema must have a `meta { id ... }` block.
347///
348/// # Example
349///
350/// ```rust,ignore
351/// // In build.rs:
352/// // facet_styx::generate_schema::<Config>("schema.styx");
353///
354/// // In src/main.rs:
355/// styx_embed::embed_outdir_file!("schema.styx");
356/// ```
357#[proc_macro]
358pub fn embed_outdir_file(input: TokenStream) -> TokenStream {
359    let mut tokens = TokenIter::new(proc_macro2::TokenStream::from(input));
360
361    let literal: unsynn::Literal = match Parse::parse(&mut tokens) {
362        Ok(l) => l,
363        Err(e) => {
364            return format!("compile_error!(\"expected filename string: {e}\")")
365                .parse()
366                .unwrap();
367        }
368    };
369
370    let filename = match parse_string_literal(&literal) {
371        Some(s) => s,
372        None => {
373            return "compile_error!(\"expected string literal for filename\")"
374                .parse()
375                .unwrap();
376        }
377    };
378
379    let out_dir = match std::env::var("OUT_DIR") {
380        Ok(dir) => dir,
381        Err(_) => {
382            return "compile_error!(\"OUT_DIR not set - this macro must be used in a crate with a build script\")"
383                .parse()
384                .unwrap()
385        }
386    };
387
388    let path = std::path::Path::new(&out_dir).join(&filename);
389    let path_str = path.display().to_string();
390
391    let content = match std::fs::read_to_string(&path) {
392        Ok(c) => c,
393        Err(e) => {
394            return format!("compile_error!(\"failed to read {}: {}\")", path_str, e)
395                .parse()
396                .unwrap();
397        }
398    };
399
400    match generate_static(&content) {
401        Ok(ts) => ts,
402        Err(e) => format!("compile_error!(\"{}\")", e.replace('"', "\\\""))
403            .parse()
404            .unwrap(),
405    }
406}
407
408// Keep the old names as aliases for compatibility
409#[proc_macro]
410pub fn embed_schema(input: TokenStream) -> TokenStream {
411    embed_inline(input)
412}
413
414#[proc_macro]
415pub fn embed_schemas(input: TokenStream) -> TokenStream {
416    embed_inline(input)
417}