1#![doc = include_str!("../README.md")]
2pub use styx_embed_macros::{
80 embed_file, embed_files, embed_inline, embed_outdir_file, embed_schema, embed_schemas,
81};
82
83pub const MAGIC_V2: &[u8; 16] = b"STYX_SCHEMA_V2\0\0";
86
87pub const MAGIC_V1: &[u8; 16] = b"STYX_SCHEMAS_V1\0";
90
91#[derive(Debug)]
93pub enum ExtractError {
94 NotFound,
96 Truncated,
98 DecompressFailed,
100 HashMismatch,
102 InvalidUtf8,
104}
105
106impl std::fmt::Display for ExtractError {
107 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
108 match self {
109 ExtractError::NotFound => write!(f, "no embedded styx schemas found"),
110 ExtractError::Truncated => write!(f, "embedded schema data is truncated"),
111 ExtractError::DecompressFailed => write!(f, "LZ4 decompression failed"),
112 ExtractError::HashMismatch => write!(f, "BLAKE3 hash mismatch"),
113 ExtractError::InvalidUtf8 => write!(f, "schema is not valid UTF-8"),
114 }
115 }
116}
117
118impl std::error::Error for ExtractError {}
119
120pub fn compress_schema(schema: &str) -> Vec<u8> {
122 let decompressed = schema.as_bytes();
123 let hash = blake3::hash(decompressed);
124 let compressed = lz4_flex::compress_prepend_size(decompressed);
125
126 let mut blob = Vec::with_capacity(16 + 4 + 4 + 32 + compressed.len());
127 blob.extend_from_slice(MAGIC_V2);
128 blob.extend_from_slice(&(decompressed.len() as u32).to_le_bytes());
129 blob.extend_from_slice(&(compressed.len() as u32).to_le_bytes());
130 blob.extend_from_slice(hash.as_bytes());
131 blob.extend_from_slice(&compressed);
132 blob
133}
134
135pub fn build_embedded_blob(schema: &str) -> Vec<u8> {
137 compress_schema(schema)
138}
139
140pub fn extract_schemas(data: &[u8]) -> Result<Vec<String>, ExtractError> {
147 let mut schemas = Vec::new();
148 let mut search_start = 0;
149
150 while let Some(magic_pos) = find_magic_from(data, search_start, MAGIC_V2) {
152 match try_extract_v2_at(data, magic_pos) {
153 Ok(schema) => {
154 schemas.push(schema);
155 search_start = magic_pos + MAGIC_V2.len();
157 }
158 Err(_) => {
159 search_start = magic_pos + 1;
161 }
162 }
163 }
164
165 search_start = 0;
167 while let Some(magic_pos) = find_magic_from(data, search_start, MAGIC_V1) {
168 match try_extract_v1_at(data, magic_pos) {
169 Ok(mut v1_schemas) => {
170 schemas.append(&mut v1_schemas);
171 search_start = magic_pos + MAGIC_V1.len();
172 }
173 Err(_) => {
174 search_start = magic_pos + 1;
175 }
176 }
177 }
178
179 if schemas.is_empty() {
180 Err(ExtractError::NotFound)
181 } else {
182 Ok(schemas)
183 }
184}
185
186fn try_extract_v2_at(data: &[u8], magic_pos: usize) -> Result<String, ExtractError> {
188 let mut pos = magic_pos + MAGIC_V2.len();
189
190 if pos + 40 > data.len() {
192 return Err(ExtractError::Truncated);
193 }
194
195 let decompressed_len =
196 u32::from_le_bytes([data[pos], data[pos + 1], data[pos + 2], data[pos + 3]]) as usize;
197 pos += 4;
198
199 let compressed_len =
200 u32::from_le_bytes([data[pos], data[pos + 1], data[pos + 2], data[pos + 3]]) as usize;
201 pos += 4;
202
203 let expected_hash: [u8; 32] = data[pos..pos + 32]
204 .try_into()
205 .map_err(|_| ExtractError::Truncated)?;
206 pos += 32;
207
208 if pos + compressed_len > data.len() {
210 return Err(ExtractError::Truncated);
211 }
212 let compressed = &data[pos..pos + compressed_len];
213
214 let decompressed = lz4_flex::decompress_size_prepended(compressed)
216 .map_err(|_| ExtractError::DecompressFailed)?;
217
218 if decompressed.len() != decompressed_len {
220 return Err(ExtractError::DecompressFailed);
221 }
222
223 let actual_hash = blake3::hash(&decompressed);
225 if actual_hash.as_bytes() != &expected_hash {
226 return Err(ExtractError::HashMismatch);
227 }
228
229 String::from_utf8(decompressed).map_err(|_| ExtractError::InvalidUtf8)
231}
232
233fn try_extract_v1_at(data: &[u8], magic_pos: usize) -> Result<Vec<String>, ExtractError> {
235 let mut pos = magic_pos + MAGIC_V1.len();
236
237 if pos + 2 > data.len() {
239 return Err(ExtractError::Truncated);
240 }
241 let count = u16::from_le_bytes([data[pos], data[pos + 1]]) as usize;
242 pos += 2;
243
244 let mut schemas = Vec::with_capacity(count);
245
246 for _ in 0..count {
247 if pos + 40 > data.len() {
249 return Err(ExtractError::Truncated);
250 }
251
252 let decompressed_len =
253 u32::from_le_bytes([data[pos], data[pos + 1], data[pos + 2], data[pos + 3]]) as usize;
254 pos += 4;
255
256 let compressed_len =
257 u32::from_le_bytes([data[pos], data[pos + 1], data[pos + 2], data[pos + 3]]) as usize;
258 pos += 4;
259
260 let expected_hash: [u8; 32] = data[pos..pos + 32]
261 .try_into()
262 .map_err(|_| ExtractError::Truncated)?;
263 pos += 32;
264
265 if pos + compressed_len > data.len() {
267 return Err(ExtractError::Truncated);
268 }
269 let compressed = &data[pos..pos + compressed_len];
270 pos += compressed_len;
271
272 let decompressed = lz4_flex::decompress_size_prepended(compressed)
274 .map_err(|_| ExtractError::DecompressFailed)?;
275
276 if decompressed.len() != decompressed_len {
278 return Err(ExtractError::DecompressFailed);
279 }
280
281 let actual_hash = blake3::hash(&decompressed);
283 if actual_hash.as_bytes() != &expected_hash {
284 return Err(ExtractError::HashMismatch);
285 }
286
287 let schema = String::from_utf8(decompressed).map_err(|_| ExtractError::InvalidUtf8)?;
289 schemas.push(schema);
290 }
291
292 Ok(schemas)
293}
294
295fn find_magic_from(data: &[u8], start: usize, magic: &[u8; 16]) -> Option<usize> {
297 if start >= data.len() {
298 return None;
299 }
300 data[start..]
301 .windows(magic.len())
302 .position(|w| w == magic)
303 .map(|pos| start + pos)
304}
305
306mod section_names {
308 pub const ELF: &str = ".styx_schemas";
310 pub const MACHO_SEGMENT: &str = "__DATA";
312 pub const MACHO_SECTION: &str = "__styx_schemas";
314 pub const PE: &str = ".styx";
316}
317
318pub fn extract_schemas_from_object(data: &[u8]) -> Result<Vec<String>, ExtractError> {
324 use goblin::Object;
325
326 if let Ok(object) = Object::parse(data)
328 && let Some(section_data) = find_schema_section(&object, data)
329 {
330 return extract_schemas(section_data);
332 }
333
334 extract_schemas(data)
336}
337
338fn find_schema_section<'a>(object: &goblin::Object, data: &'a [u8]) -> Option<&'a [u8]> {
340 use goblin::Object;
341
342 match object {
343 Object::Elf(elf) => find_elf_section(elf, data),
344 Object::Mach(mach) => find_macho_section(mach, data),
345 Object::PE(pe) => find_pe_section(pe, data),
346 _ => None,
347 }
348}
349
350fn find_elf_section<'a>(elf: &goblin::elf::Elf, data: &'a [u8]) -> Option<&'a [u8]> {
352 for section in &elf.section_headers {
353 if let Some(name) = elf.shdr_strtab.get_at(section.sh_name)
354 && name == section_names::ELF
355 {
356 let start = section.sh_offset as usize;
357 let size = section.sh_size as usize;
358 if start + size <= data.len() {
359 return Some(&data[start..start + size]);
360 }
361 }
362 }
363 None
364}
365
366fn find_macho_section<'a>(mach: &goblin::mach::Mach, data: &'a [u8]) -> Option<&'a [u8]> {
368 use goblin::mach::Mach;
369
370 match mach {
371 Mach::Binary(macho) => find_macho_section_in_binary(macho, data),
372 Mach::Fat(fat) => {
373 for arch in fat.iter_arches().flatten() {
375 let start = arch.offset as usize;
376 let size = arch.size as usize;
377 if start + size <= data.len() {
378 let arch_data = &data[start..start + size];
379 if let Ok(goblin::Object::Mach(Mach::Binary(macho))) =
380 goblin::Object::parse(arch_data)
381 && let Some(section) = find_macho_section_in_binary(&macho, arch_data)
382 {
383 return Some(section);
384 }
385 }
386 }
387 None
388 }
389 }
390}
391
392fn find_macho_section_in_binary<'a>(
394 macho: &goblin::mach::MachO,
395 data: &'a [u8],
396) -> Option<&'a [u8]> {
397 for segment in &macho.segments {
398 if let Ok(name) = segment.name()
399 && name == section_names::MACHO_SEGMENT
400 {
401 for (section, _section_data) in segment.sections().ok()? {
402 if let Ok(sect_name) = section.name()
403 && sect_name == section_names::MACHO_SECTION
404 {
405 let start = section.offset as usize;
406 let size = section.size as usize;
407 if start + size <= data.len() {
408 return Some(&data[start..start + size]);
409 }
410 }
411 }
412 }
413 }
414 None
415}
416
417fn find_pe_section<'a>(pe: &goblin::pe::PE, data: &'a [u8]) -> Option<&'a [u8]> {
419 for section in &pe.sections {
420 if let Ok(name) = section.name()
421 && name == section_names::PE
422 {
423 let start = section.pointer_to_raw_data as usize;
424 let size = section.size_of_raw_data as usize;
425 if start + size <= data.len() {
426 return Some(&data[start..start + size]);
427 }
428 }
429 }
430 None
431}
432
433pub fn extract_schemas_from_file(
438 path: &std::path::Path,
439) -> Result<Vec<String>, Box<dyn std::error::Error>> {
440 use std::fs::File;
441 let file = File::open(path)?;
442 let mmap = unsafe { memmap2::Mmap::map(&file) }?;
443 Ok(extract_schemas_from_object(&mmap)?)
444}
445
446#[cfg(test)]
447mod tests {
448 use super::*;
449
450 #[test]
451 fn roundtrip_single_schema_v2() {
452 let schema = r#"meta {
453 id test-schema
454 version 1.0.0
455}
456
457schema {
458 @ @object{
459 name @string
460 port @int
461 }
462}
463"#;
464
465 let blob = build_embedded_blob(schema);
466 let extracted = extract_schemas(&blob).unwrap();
467
468 assert_eq!(extracted.len(), 1);
469 assert_eq!(extracted[0], schema);
470 }
471
472 #[test]
473 fn multiple_v2_blobs() {
474 let schema1 = "meta { id s1, version 1.0.0 }\nschema { @ @string }";
475 let schema2 = "meta { id s2, version 2.0.0 }\nschema { @ @int }";
476
477 let mut data = build_embedded_blob(schema1);
479 data.extend(build_embedded_blob(schema2));
480
481 let extracted = extract_schemas(&data).unwrap();
482
483 assert_eq!(extracted.len(), 2);
484 assert_eq!(extracted[0], schema1);
485 assert_eq!(extracted[1], schema2);
486 }
487
488 #[test]
489 fn not_found_in_random_data() {
490 let data = vec![0u8; 1000];
491 assert!(matches!(
492 extract_schemas(&data),
493 Err(ExtractError::NotFound)
494 ));
495 }
496
497 #[test]
498 fn embedded_in_larger_binary() {
499 let schema = "meta { id test, version 1.0.0 }\nschema { @ @bool }";
500
501 let mut binary = vec![0xDE, 0xAD, 0xBE, 0xEF]; binary.extend_from_slice(&[0u8; 1000]); binary.extend_from_slice(&build_embedded_blob(schema));
505 binary.extend_from_slice(&[0u8; 500]); let extracted = extract_schemas(&binary).unwrap();
508 assert_eq!(extracted.len(), 1);
509 assert_eq!(extracted[0], schema);
510 }
511
512 #[test]
513 fn hash_mismatch_detected() {
514 let schema = "meta { id test, version 1.0.0 }\nschema { @ @unit }";
515 let mut blob = build_embedded_blob(schema);
516
517 let hash_start = MAGIC_V2.len() + 4 + 4;
519 blob[hash_start] ^= 0xFF;
520
521 assert!(matches!(
522 extract_schemas(&blob),
523 Err(ExtractError::NotFound) ));
525 }
526}