1pub use styx_embed_macros::{
79 embed_file, embed_files, embed_inline, embed_outdir_file, embed_schema, embed_schemas,
80};
81
82pub const MAGIC_V2: &[u8; 16] = b"STYX_SCHEMA_V2\0\0";
85
86pub const MAGIC_V1: &[u8; 16] = b"STYX_SCHEMAS_V1\0";
89
90#[derive(Debug)]
92pub enum ExtractError {
93 NotFound,
95 Truncated,
97 DecompressFailed,
99 HashMismatch,
101 InvalidUtf8,
103}
104
105impl std::fmt::Display for ExtractError {
106 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
107 match self {
108 ExtractError::NotFound => write!(f, "no embedded styx schemas found"),
109 ExtractError::Truncated => write!(f, "embedded schema data is truncated"),
110 ExtractError::DecompressFailed => write!(f, "LZ4 decompression failed"),
111 ExtractError::HashMismatch => write!(f, "BLAKE3 hash mismatch"),
112 ExtractError::InvalidUtf8 => write!(f, "schema is not valid UTF-8"),
113 }
114 }
115}
116
117impl std::error::Error for ExtractError {}
118
119pub fn compress_schema(schema: &str) -> Vec<u8> {
121 let decompressed = schema.as_bytes();
122 let hash = blake3::hash(decompressed);
123 let compressed = lz4_flex::compress_prepend_size(decompressed);
124
125 let mut blob = Vec::with_capacity(16 + 4 + 4 + 32 + compressed.len());
126 blob.extend_from_slice(MAGIC_V2);
127 blob.extend_from_slice(&(decompressed.len() as u32).to_le_bytes());
128 blob.extend_from_slice(&(compressed.len() as u32).to_le_bytes());
129 blob.extend_from_slice(hash.as_bytes());
130 blob.extend_from_slice(&compressed);
131 blob
132}
133
134pub fn build_embedded_blob(schema: &str) -> Vec<u8> {
136 compress_schema(schema)
137}
138
139pub fn extract_schemas(data: &[u8]) -> Result<Vec<String>, ExtractError> {
146 let mut schemas = Vec::new();
147 let mut search_start = 0;
148
149 while let Some(magic_pos) = find_magic_from(data, search_start, MAGIC_V2) {
151 match try_extract_v2_at(data, magic_pos) {
152 Ok(schema) => {
153 schemas.push(schema);
154 search_start = magic_pos + MAGIC_V2.len();
156 }
157 Err(_) => {
158 search_start = magic_pos + 1;
160 }
161 }
162 }
163
164 search_start = 0;
166 while let Some(magic_pos) = find_magic_from(data, search_start, MAGIC_V1) {
167 match try_extract_v1_at(data, magic_pos) {
168 Ok(mut v1_schemas) => {
169 schemas.append(&mut v1_schemas);
170 search_start = magic_pos + MAGIC_V1.len();
171 }
172 Err(_) => {
173 search_start = magic_pos + 1;
174 }
175 }
176 }
177
178 if schemas.is_empty() {
179 Err(ExtractError::NotFound)
180 } else {
181 Ok(schemas)
182 }
183}
184
185fn try_extract_v2_at(data: &[u8], magic_pos: usize) -> Result<String, ExtractError> {
187 let mut pos = magic_pos + MAGIC_V2.len();
188
189 if pos + 40 > data.len() {
191 return Err(ExtractError::Truncated);
192 }
193
194 let decompressed_len =
195 u32::from_le_bytes([data[pos], data[pos + 1], data[pos + 2], data[pos + 3]]) as usize;
196 pos += 4;
197
198 let compressed_len =
199 u32::from_le_bytes([data[pos], data[pos + 1], data[pos + 2], data[pos + 3]]) as usize;
200 pos += 4;
201
202 let expected_hash: [u8; 32] = data[pos..pos + 32]
203 .try_into()
204 .map_err(|_| ExtractError::Truncated)?;
205 pos += 32;
206
207 if pos + compressed_len > data.len() {
209 return Err(ExtractError::Truncated);
210 }
211 let compressed = &data[pos..pos + compressed_len];
212
213 let decompressed = lz4_flex::decompress_size_prepended(compressed)
215 .map_err(|_| ExtractError::DecompressFailed)?;
216
217 if decompressed.len() != decompressed_len {
219 return Err(ExtractError::DecompressFailed);
220 }
221
222 let actual_hash = blake3::hash(&decompressed);
224 if actual_hash.as_bytes() != &expected_hash {
225 return Err(ExtractError::HashMismatch);
226 }
227
228 String::from_utf8(decompressed).map_err(|_| ExtractError::InvalidUtf8)
230}
231
232fn try_extract_v1_at(data: &[u8], magic_pos: usize) -> Result<Vec<String>, ExtractError> {
234 let mut pos = magic_pos + MAGIC_V1.len();
235
236 if pos + 2 > data.len() {
238 return Err(ExtractError::Truncated);
239 }
240 let count = u16::from_le_bytes([data[pos], data[pos + 1]]) as usize;
241 pos += 2;
242
243 let mut schemas = Vec::with_capacity(count);
244
245 for _ in 0..count {
246 if pos + 40 > data.len() {
248 return Err(ExtractError::Truncated);
249 }
250
251 let decompressed_len =
252 u32::from_le_bytes([data[pos], data[pos + 1], data[pos + 2], data[pos + 3]]) as usize;
253 pos += 4;
254
255 let compressed_len =
256 u32::from_le_bytes([data[pos], data[pos + 1], data[pos + 2], data[pos + 3]]) as usize;
257 pos += 4;
258
259 let expected_hash: [u8; 32] = data[pos..pos + 32]
260 .try_into()
261 .map_err(|_| ExtractError::Truncated)?;
262 pos += 32;
263
264 if pos + compressed_len > data.len() {
266 return Err(ExtractError::Truncated);
267 }
268 let compressed = &data[pos..pos + compressed_len];
269 pos += compressed_len;
270
271 let decompressed = lz4_flex::decompress_size_prepended(compressed)
273 .map_err(|_| ExtractError::DecompressFailed)?;
274
275 if decompressed.len() != decompressed_len {
277 return Err(ExtractError::DecompressFailed);
278 }
279
280 let actual_hash = blake3::hash(&decompressed);
282 if actual_hash.as_bytes() != &expected_hash {
283 return Err(ExtractError::HashMismatch);
284 }
285
286 let schema = String::from_utf8(decompressed).map_err(|_| ExtractError::InvalidUtf8)?;
288 schemas.push(schema);
289 }
290
291 Ok(schemas)
292}
293
294fn find_magic_from(data: &[u8], start: usize, magic: &[u8; 16]) -> Option<usize> {
296 if start >= data.len() {
297 return None;
298 }
299 data[start..]
300 .windows(magic.len())
301 .position(|w| w == magic)
302 .map(|pos| start + pos)
303}
304
305mod section_names {
307 pub const ELF: &str = ".styx_schemas";
309 pub const MACHO_SEGMENT: &str = "__DATA";
311 pub const MACHO_SECTION: &str = "__styx_schemas";
313 pub const PE: &str = ".styx";
315}
316
317pub fn extract_schemas_from_object(data: &[u8]) -> Result<Vec<String>, ExtractError> {
323 use goblin::Object;
324
325 if let Ok(object) = Object::parse(data)
327 && let Some(section_data) = find_schema_section(&object, data)
328 {
329 return extract_schemas(section_data);
331 }
332
333 extract_schemas(data)
335}
336
337fn find_schema_section<'a>(object: &goblin::Object, data: &'a [u8]) -> Option<&'a [u8]> {
339 use goblin::Object;
340
341 match object {
342 Object::Elf(elf) => find_elf_section(elf, data),
343 Object::Mach(mach) => find_macho_section(mach, data),
344 Object::PE(pe) => find_pe_section(pe, data),
345 _ => None,
346 }
347}
348
349fn find_elf_section<'a>(elf: &goblin::elf::Elf, data: &'a [u8]) -> Option<&'a [u8]> {
351 for section in &elf.section_headers {
352 if let Some(name) = elf.shdr_strtab.get_at(section.sh_name)
353 && name == section_names::ELF
354 {
355 let start = section.sh_offset as usize;
356 let size = section.sh_size as usize;
357 if start + size <= data.len() {
358 return Some(&data[start..start + size]);
359 }
360 }
361 }
362 None
363}
364
365fn find_macho_section<'a>(mach: &goblin::mach::Mach, data: &'a [u8]) -> Option<&'a [u8]> {
367 use goblin::mach::Mach;
368
369 match mach {
370 Mach::Binary(macho) => find_macho_section_in_binary(macho, data),
371 Mach::Fat(fat) => {
372 for arch in fat.iter_arches().flatten() {
374 let start = arch.offset as usize;
375 let size = arch.size as usize;
376 if start + size <= data.len() {
377 let arch_data = &data[start..start + size];
378 if let Ok(goblin::Object::Mach(Mach::Binary(macho))) =
379 goblin::Object::parse(arch_data)
380 && let Some(section) = find_macho_section_in_binary(&macho, arch_data)
381 {
382 return Some(section);
383 }
384 }
385 }
386 None
387 }
388 }
389}
390
391fn find_macho_section_in_binary<'a>(
393 macho: &goblin::mach::MachO,
394 data: &'a [u8],
395) -> Option<&'a [u8]> {
396 for segment in &macho.segments {
397 if let Ok(name) = segment.name()
398 && name == section_names::MACHO_SEGMENT
399 {
400 for (section, _section_data) in segment.sections().ok()? {
401 if let Ok(sect_name) = section.name()
402 && sect_name == section_names::MACHO_SECTION
403 {
404 let start = section.offset as usize;
405 let size = section.size as usize;
406 if start + size <= data.len() {
407 return Some(&data[start..start + size]);
408 }
409 }
410 }
411 }
412 }
413 None
414}
415
416fn find_pe_section<'a>(pe: &goblin::pe::PE, data: &'a [u8]) -> Option<&'a [u8]> {
418 for section in &pe.sections {
419 if let Ok(name) = section.name()
420 && name == section_names::PE
421 {
422 let start = section.pointer_to_raw_data as usize;
423 let size = section.size_of_raw_data as usize;
424 if start + size <= data.len() {
425 return Some(&data[start..start + size]);
426 }
427 }
428 }
429 None
430}
431
432pub fn extract_schemas_from_file(
437 path: &std::path::Path,
438) -> Result<Vec<String>, Box<dyn std::error::Error>> {
439 use std::fs::File;
440 let file = File::open(path)?;
441 let mmap = unsafe { memmap2::Mmap::map(&file) }?;
442 Ok(extract_schemas_from_object(&mmap)?)
443}
444
445#[cfg(test)]
446mod tests {
447 use super::*;
448
449 #[test]
450 fn roundtrip_single_schema_v2() {
451 let schema = r#"meta {
452 id test-schema
453 version 1.0.0
454}
455
456schema {
457 @ @object{
458 name @string
459 port @int
460 }
461}
462"#;
463
464 let blob = build_embedded_blob(schema);
465 let extracted = extract_schemas(&blob).unwrap();
466
467 assert_eq!(extracted.len(), 1);
468 assert_eq!(extracted[0], schema);
469 }
470
471 #[test]
472 fn multiple_v2_blobs() {
473 let schema1 = "meta { id s1, version 1.0.0 }\nschema { @ @string }";
474 let schema2 = "meta { id s2, version 2.0.0 }\nschema { @ @int }";
475
476 let mut data = build_embedded_blob(schema1);
478 data.extend(build_embedded_blob(schema2));
479
480 let extracted = extract_schemas(&data).unwrap();
481
482 assert_eq!(extracted.len(), 2);
483 assert_eq!(extracted[0], schema1);
484 assert_eq!(extracted[1], schema2);
485 }
486
487 #[test]
488 fn not_found_in_random_data() {
489 let data = vec![0u8; 1000];
490 assert!(matches!(
491 extract_schemas(&data),
492 Err(ExtractError::NotFound)
493 ));
494 }
495
496 #[test]
497 fn embedded_in_larger_binary() {
498 let schema = "meta { id test, version 1.0.0 }\nschema { @ @bool }";
499
500 let mut binary = vec![0xDE, 0xAD, 0xBE, 0xEF]; binary.extend_from_slice(&[0u8; 1000]); binary.extend_from_slice(&build_embedded_blob(schema));
504 binary.extend_from_slice(&[0u8; 500]); let extracted = extract_schemas(&binary).unwrap();
507 assert_eq!(extracted.len(), 1);
508 assert_eq!(extracted[0], schema);
509 }
510
511 #[test]
512 fn hash_mismatch_detected() {
513 let schema = "meta { id test, version 1.0.0 }\nschema { @ @unit }";
514 let mut blob = build_embedded_blob(schema);
515
516 let hash_start = MAGIC_V2.len() + 4 + 4;
518 blob[hash_start] ^= 0xFF;
519
520 assert!(matches!(
521 extract_schemas(&blob),
522 Err(ExtractError::NotFound) ));
524 }
525}