#![doc = include_str!("../README.md")]
pub use styx_embed_macros::{
embed_file, embed_files, embed_inline, embed_outdir_file, embed_schema, embed_schemas,
};
pub const MAGIC_V2: &[u8; 16] = b"STYX_SCHEMA_V2\0\0";
pub const MAGIC_V1: &[u8; 16] = b"STYX_SCHEMAS_V1\0";
#[derive(Debug)]
pub enum ExtractError {
NotFound,
Truncated,
DecompressFailed,
HashMismatch,
InvalidUtf8,
}
impl std::fmt::Display for ExtractError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ExtractError::NotFound => write!(f, "no embedded styx schemas found"),
ExtractError::Truncated => write!(f, "embedded schema data is truncated"),
ExtractError::DecompressFailed => write!(f, "LZ4 decompression failed"),
ExtractError::HashMismatch => write!(f, "BLAKE3 hash mismatch"),
ExtractError::InvalidUtf8 => write!(f, "schema is not valid UTF-8"),
}
}
}
impl std::error::Error for ExtractError {}
pub fn compress_schema(schema: &str) -> Vec<u8> {
let decompressed = schema.as_bytes();
let hash = blake3::hash(decompressed);
let compressed = lz4_flex::compress_prepend_size(decompressed);
let mut blob = Vec::with_capacity(16 + 4 + 4 + 32 + compressed.len());
blob.extend_from_slice(MAGIC_V2);
blob.extend_from_slice(&(decompressed.len() as u32).to_le_bytes());
blob.extend_from_slice(&(compressed.len() as u32).to_le_bytes());
blob.extend_from_slice(hash.as_bytes());
blob.extend_from_slice(&compressed);
blob
}
pub fn build_embedded_blob(schema: &str) -> Vec<u8> {
compress_schema(schema)
}
pub fn extract_schemas(data: &[u8]) -> Result<Vec<String>, ExtractError> {
let mut schemas = Vec::new();
let mut search_start = 0;
while let Some(magic_pos) = find_magic_from(data, search_start, MAGIC_V2) {
match try_extract_v2_at(data, magic_pos) {
Ok(schema) => {
schemas.push(schema);
search_start = magic_pos + MAGIC_V2.len();
}
Err(_) => {
search_start = magic_pos + 1;
}
}
}
search_start = 0;
while let Some(magic_pos) = find_magic_from(data, search_start, MAGIC_V1) {
match try_extract_v1_at(data, magic_pos) {
Ok(mut v1_schemas) => {
schemas.append(&mut v1_schemas);
search_start = magic_pos + MAGIC_V1.len();
}
Err(_) => {
search_start = magic_pos + 1;
}
}
}
if schemas.is_empty() {
Err(ExtractError::NotFound)
} else {
Ok(schemas)
}
}
fn try_extract_v2_at(data: &[u8], magic_pos: usize) -> Result<String, ExtractError> {
let mut pos = magic_pos + MAGIC_V2.len();
if pos + 40 > data.len() {
return Err(ExtractError::Truncated);
}
let decompressed_len =
u32::from_le_bytes([data[pos], data[pos + 1], data[pos + 2], data[pos + 3]]) as usize;
pos += 4;
let compressed_len =
u32::from_le_bytes([data[pos], data[pos + 1], data[pos + 2], data[pos + 3]]) as usize;
pos += 4;
let expected_hash: [u8; 32] = data[pos..pos + 32]
.try_into()
.map_err(|_| ExtractError::Truncated)?;
pos += 32;
if pos + compressed_len > data.len() {
return Err(ExtractError::Truncated);
}
let compressed = &data[pos..pos + compressed_len];
let decompressed = lz4_flex::decompress_size_prepended(compressed)
.map_err(|_| ExtractError::DecompressFailed)?;
if decompressed.len() != decompressed_len {
return Err(ExtractError::DecompressFailed);
}
let actual_hash = blake3::hash(&decompressed);
if actual_hash.as_bytes() != &expected_hash {
return Err(ExtractError::HashMismatch);
}
String::from_utf8(decompressed).map_err(|_| ExtractError::InvalidUtf8)
}
fn try_extract_v1_at(data: &[u8], magic_pos: usize) -> Result<Vec<String>, ExtractError> {
let mut pos = magic_pos + MAGIC_V1.len();
if pos + 2 > data.len() {
return Err(ExtractError::Truncated);
}
let count = u16::from_le_bytes([data[pos], data[pos + 1]]) as usize;
pos += 2;
let mut schemas = Vec::with_capacity(count);
for _ in 0..count {
if pos + 40 > data.len() {
return Err(ExtractError::Truncated);
}
let decompressed_len =
u32::from_le_bytes([data[pos], data[pos + 1], data[pos + 2], data[pos + 3]]) as usize;
pos += 4;
let compressed_len =
u32::from_le_bytes([data[pos], data[pos + 1], data[pos + 2], data[pos + 3]]) as usize;
pos += 4;
let expected_hash: [u8; 32] = data[pos..pos + 32]
.try_into()
.map_err(|_| ExtractError::Truncated)?;
pos += 32;
if pos + compressed_len > data.len() {
return Err(ExtractError::Truncated);
}
let compressed = &data[pos..pos + compressed_len];
pos += compressed_len;
let decompressed = lz4_flex::decompress_size_prepended(compressed)
.map_err(|_| ExtractError::DecompressFailed)?;
if decompressed.len() != decompressed_len {
return Err(ExtractError::DecompressFailed);
}
let actual_hash = blake3::hash(&decompressed);
if actual_hash.as_bytes() != &expected_hash {
return Err(ExtractError::HashMismatch);
}
let schema = String::from_utf8(decompressed).map_err(|_| ExtractError::InvalidUtf8)?;
schemas.push(schema);
}
Ok(schemas)
}
fn find_magic_from(data: &[u8], start: usize, magic: &[u8; 16]) -> Option<usize> {
if start >= data.len() {
return None;
}
data[start..]
.windows(magic.len())
.position(|w| w == magic)
.map(|pos| start + pos)
}
mod section_names {
pub const ELF: &str = ".styx_schemas";
pub const MACHO_SEGMENT: &str = "__DATA";
pub const MACHO_SECTION: &str = "__styx_schemas";
pub const PE: &str = ".styx";
}
pub fn extract_schemas_from_object(data: &[u8]) -> Result<Vec<String>, ExtractError> {
use goblin::Object;
if let Ok(object) = Object::parse(data)
&& let Some(section_data) = find_schema_section(&object, data)
{
return extract_schemas(section_data);
}
extract_schemas(data)
}
fn find_schema_section<'a>(object: &goblin::Object, data: &'a [u8]) -> Option<&'a [u8]> {
use goblin::Object;
match object {
Object::Elf(elf) => find_elf_section(elf, data),
Object::Mach(mach) => find_macho_section(mach, data),
Object::PE(pe) => find_pe_section(pe, data),
_ => None,
}
}
fn find_elf_section<'a>(elf: &goblin::elf::Elf, data: &'a [u8]) -> Option<&'a [u8]> {
for section in &elf.section_headers {
if let Some(name) = elf.shdr_strtab.get_at(section.sh_name)
&& name == section_names::ELF
{
let start = section.sh_offset as usize;
let size = section.sh_size as usize;
if start + size <= data.len() {
return Some(&data[start..start + size]);
}
}
}
None
}
fn find_macho_section<'a>(mach: &goblin::mach::Mach, data: &'a [u8]) -> Option<&'a [u8]> {
use goblin::mach::Mach;
match mach {
Mach::Binary(macho) => find_macho_section_in_binary(macho, data),
Mach::Fat(fat) => {
for arch in fat.iter_arches().flatten() {
let start = arch.offset as usize;
let size = arch.size as usize;
if start + size <= data.len() {
let arch_data = &data[start..start + size];
if let Ok(goblin::Object::Mach(Mach::Binary(macho))) =
goblin::Object::parse(arch_data)
&& let Some(section) = find_macho_section_in_binary(&macho, arch_data)
{
return Some(section);
}
}
}
None
}
}
}
fn find_macho_section_in_binary<'a>(
macho: &goblin::mach::MachO,
data: &'a [u8],
) -> Option<&'a [u8]> {
for segment in &macho.segments {
if let Ok(name) = segment.name()
&& name == section_names::MACHO_SEGMENT
{
for (section, _section_data) in segment.sections().ok()? {
if let Ok(sect_name) = section.name()
&& sect_name == section_names::MACHO_SECTION
{
let start = section.offset as usize;
let size = section.size as usize;
if start + size <= data.len() {
return Some(&data[start..start + size]);
}
}
}
}
}
None
}
fn find_pe_section<'a>(pe: &goblin::pe::PE, data: &'a [u8]) -> Option<&'a [u8]> {
for section in &pe.sections {
if let Ok(name) = section.name()
&& name == section_names::PE
{
let start = section.pointer_to_raw_data as usize;
let size = section.size_of_raw_data as usize;
if start + size <= data.len() {
return Some(&data[start..start + size]);
}
}
}
None
}
pub fn extract_schemas_from_file(
path: &std::path::Path,
) -> Result<Vec<String>, Box<dyn std::error::Error>> {
use std::fs::File;
let file = File::open(path)?;
let mmap = unsafe { memmap2::Mmap::map(&file) }?;
Ok(extract_schemas_from_object(&mmap)?)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn roundtrip_single_schema_v2() {
let schema = r#"meta {
id test-schema
version 1.0.0
}
schema {
@ @object{
name @string
port @int
}
}
"#;
let blob = build_embedded_blob(schema);
let extracted = extract_schemas(&blob).unwrap();
assert_eq!(extracted.len(), 1);
assert_eq!(extracted[0], schema);
}
#[test]
fn multiple_v2_blobs() {
let schema1 = "meta { id s1, version 1.0.0 }\nschema { @ @string }";
let schema2 = "meta { id s2, version 2.0.0 }\nschema { @ @int }";
let mut data = build_embedded_blob(schema1);
data.extend(build_embedded_blob(schema2));
let extracted = extract_schemas(&data).unwrap();
assert_eq!(extracted.len(), 2);
assert_eq!(extracted[0], schema1);
assert_eq!(extracted[1], schema2);
}
#[test]
fn not_found_in_random_data() {
let data = vec![0u8; 1000];
assert!(matches!(
extract_schemas(&data),
Err(ExtractError::NotFound)
));
}
#[test]
fn embedded_in_larger_binary() {
let schema = "meta { id test, version 1.0.0 }\nschema { @ @bool }";
let mut binary = vec![0xDE, 0xAD, 0xBE, 0xEF]; binary.extend_from_slice(&[0u8; 1000]); binary.extend_from_slice(&build_embedded_blob(schema));
binary.extend_from_slice(&[0u8; 500]);
let extracted = extract_schemas(&binary).unwrap();
assert_eq!(extracted.len(), 1);
assert_eq!(extracted[0], schema);
}
#[test]
fn hash_mismatch_detected() {
let schema = "meta { id test, version 1.0.0 }\nschema { @ @unit }";
let mut blob = build_embedded_blob(schema);
let hash_start = MAGIC_V2.len() + 4 + 4;
blob[hash_start] ^= 0xFF;
assert!(matches!(
extract_schemas(&blob),
Err(ExtractError::NotFound) ));
}
}