pub use config::{
DecompositionRules, DEFAULT_BEAM_WIDTH, DEFAULT_MAX_DEPTH, DEFAULT_MAX_WITNESSES,
};
pub use error::DecompositionError;
pub use finding::{DecompositionFinding, DecompositionReport};
pub use search::BeamSearch;
pub use search_trait::DecompositionSearch;
use crate::spec::types::OpSpec;
#[inline]
pub fn enforce_registry(specs: &[OpSpec]) -> DecompositionReport {
enforce_registry_with_rules(specs, &DecompositionRules::default(), &BeamSearch)
}
#[inline]
pub fn enforce_registry_with_rules(
specs: &[OpSpec],
rules: &DecompositionRules,
search: &dyn DecompositionSearch,
) -> DecompositionReport {
let mut report = DecompositionReport::default();
if let Err(message) = rules.validate() {
report.errors.push(message);
return report;
}
for spec in specs {
match search.search(spec, specs, rules) {
Ok(Some(finding)) => report.findings.push(finding),
Ok(None) => {}
Err(err) => report.errors.push(format!(
"decomposition({}): {err}. Fix: repair CPU references before enforcing LEGO-brick minimality.",
spec.id
)),
}
}
report
}
mod candidate {
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Candidate {
pub repr: String,
pub depth: usize,
pub outputs: Vec<[u8; 4]>,
}
impl Candidate {
#[inline]
pub fn mismatch_count(&self, target: &[[u8; 4]]) -> usize {
self.outputs
.iter()
.zip(target.iter())
.filter(|(left, right)| left != right)
.count()
}
}
}
mod config {
use serde::Deserialize;
pub const DEFAULT_BEAM_WIDTH: usize = 16;
pub const DEFAULT_MAX_DEPTH: usize = 3;
pub const DEFAULT_MAX_WITNESSES: usize = 128;
#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]
#[serde(deny_unknown_fields)]
pub struct DecompositionRules {
#[serde(default = "default_max_depth")]
pub max_depth: usize,
#[serde(default = "default_beam_width")]
pub beam_width: usize,
#[serde(default = "default_max_witnesses")]
pub max_witnesses: usize,
}
impl Default for DecompositionRules {
fn default() -> Self {
Self {
max_depth: DEFAULT_MAX_DEPTH,
beam_width: DEFAULT_BEAM_WIDTH,
max_witnesses: DEFAULT_MAX_WITNESSES,
}
}
}
impl DecompositionRules {
#[inline]
pub fn from_toml(src: &str) -> Result<Self, String> {
let rules: Self = toml::from_str(src).map_err(|err| {
format!("invalid decomposition TOML: {err}. Fix: use max_depth, beam_width, and max_witnesses keys.")
})?;
rules.validate()?;
Ok(rules)
}
#[inline]
pub fn validate(&self) -> Result<(), String> {
if self.max_depth == 0 || self.max_depth > DEFAULT_MAX_DEPTH {
return Err(format!(
"invalid max_depth {}. Fix: choose a value from 1 through {}.",
self.max_depth, DEFAULT_MAX_DEPTH
));
}
if self.beam_width == 0 {
return Err(
"invalid beam_width 0. Fix: keep at least one candidate per depth.".to_string(),
);
}
if self.max_witnesses == 0 {
return Err(
"invalid max_witnesses 0. Fix: evaluate at least one parity witness."
.to_string(),
);
}
Ok(())
}
}
fn default_max_depth() -> usize {
DEFAULT_MAX_DEPTH
}
fn default_beam_width() -> usize {
DEFAULT_BEAM_WIDTH
}
fn default_max_witnesses() -> usize {
DEFAULT_MAX_WITNESSES
}
}
mod corpus {
use std::collections::BTreeSet;
use crate::generate::generators::{default_generators, InputGenerator};
use crate::spec::OpSpec;
const SEED: u64 = 0x4C45_474F_4252_4943;
#[inline]
pub fn parity_corpus(spec: &OpSpec, max_witnesses: usize) -> Vec<Vec<u8>> {
let mut seen = BTreeSet::new();
let mut corpus = Vec::new();
for generator in default_generators() {
emit_generator(
spec,
generator.as_ref(),
max_witnesses,
&mut seen,
&mut corpus,
);
if corpus.len() >= max_witnesses {
break;
}
}
if corpus.is_empty() {
let zero = vec![0; spec.signature.min_input_bytes()];
if seen.insert(zero.clone()) {
corpus.push(zero);
}
}
corpus
}
fn emit_generator(
spec: &OpSpec,
generator: &dyn InputGenerator,
max_witnesses: usize,
seen: &mut BTreeSet<Vec<u8>>,
corpus: &mut Vec<Vec<u8>>,
) {
if !generator.handles(&spec.signature) {
return;
}
generator.generate_for_op_streaming(spec.id, &spec.signature, SEED, &mut |_, bytes| {
if corpus.len() < max_witnesses
&& bytes.len() >= spec.signature.min_input_bytes()
&& seen.insert(bytes.clone())
{
corpus.push(bytes);
}
});
}
}
mod error {
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct DecompositionError {
message: String,
}
impl DecompositionError {
#[inline]
pub fn new(message: impl Into<String>) -> Self {
Self {
message: message.into(),
}
}
}
impl core::fmt::Display for DecompositionError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
write!(f, "{}", self.message)
}
}
impl std::error::Error for DecompositionError {}
}
mod eval {
use std::panic::{catch_unwind, AssertUnwindSafe};
use crate::enforce::enforcers::decomposition::error::DecompositionError;
#[inline]
pub fn eval_u32(
op_id: &str,
cpu_fn: fn(&[u8]) -> Vec<u8>,
input: &[u8],
) -> Result<[u8; 4], DecompositionError> {
let output = catch_unwind(AssertUnwindSafe(|| cpu_fn(input))).map_err(|_| {
DecompositionError::new(format!(
"{op_id} cpu_fn panicked during decomposition search. Fix: make CPU references total over parity corpus inputs."
))
})?;
let slice = output.get(0..4).ok_or_else(|| {
DecompositionError::new(format!(
"{op_id} cpu_fn returned {} byte(s), expected 4. Fix: return one u32 for U32 signatures.",
output.len()
))
})?;
let mut bytes = [0_u8; 4];
bytes.copy_from_slice(slice);
if output.len() != 4 {
return Err(DecompositionError::new(format!(
"{op_id} cpu_fn returned {} byte(s), expected exactly 4. Fix: align expected_output_bytes with the U32 signature.",
output.len()
)));
}
Ok(bytes)
}
#[inline]
pub fn join_binary(left: &[u8; 4], right: &[u8; 4]) -> [u8; 8] {
let mut input = [0_u8; 8];
input[0..4].copy_from_slice(left);
input[4..8].copy_from_slice(right);
input
}
}
mod finding {
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct DecompositionFinding {
pub op_id: String,
pub composition: String,
pub depth: usize,
pub witnesses: usize,
}
impl DecompositionFinding {
#[inline]
pub fn message(&self) -> String {
format!(
"{} is a standalone primitive but matches `{}` at depth {} across {} parity witnesses. Fix: remove the standalone primitive or declare it as that composition.",
self.op_id, self.composition, self.depth, self.witnesses
)
}
}
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct DecompositionReport {
pub findings: Vec<DecompositionFinding>,
pub errors: Vec<String>,
}
impl DecompositionReport {
#[inline]
pub fn passed(&self) -> bool {
self.findings.is_empty() && self.errors.is_empty()
}
#[inline]
pub fn messages(&self) -> Vec<String> {
self.findings
.iter()
.map(DecompositionFinding::message)
.chain(self.errors.iter().cloned())
.collect()
}
}
}
mod op_kind {
use crate::spec::types::DataType;
use crate::spec::OpSpec;
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum OpKind {
Unary,
Binary,
}
impl OpKind {
#[inline]
pub fn from_spec(spec: &OpSpec) -> Option<Self> {
if spec.signature.output != DataType::U32 {
return None;
}
match spec.signature.inputs.as_slice() {
[DataType::U32] => Some(Self::Unary),
[DataType::U32, DataType::U32] => Some(Self::Binary),
_ => None,
}
}
}
}
mod search {
use std::collections::BTreeSet;
use crate::enforce::enforcers::decomposition::candidate::Candidate;
use crate::enforce::enforcers::decomposition::config::DecompositionRules;
use crate::enforce::enforcers::decomposition::corpus::parity_corpus;
use crate::enforce::enforcers::decomposition::error::DecompositionError;
use crate::enforce::enforcers::decomposition::eval::{eval_u32, join_binary};
use crate::enforce::enforcers::decomposition::finding::DecompositionFinding;
use crate::enforce::enforcers::decomposition::op_kind::OpKind;
use crate::enforce::enforcers::decomposition::search_trait::DecompositionSearch;
use crate::spec::OpSpec;
#[derive(Clone, Debug, Default)]
pub struct BeamSearch;
impl DecompositionSearch for BeamSearch {
fn search(
&self,
target: &OpSpec,
universe: &[OpSpec],
rules: &DecompositionRules,
) -> Result<Option<DecompositionFinding>, DecompositionError> {
rules.validate().map_err(DecompositionError::new)?;
let Some(kind) = OpKind::from_spec(target) else {
return Ok(None);
};
let corpus = parity_corpus(target, rules.max_witnesses);
let target_outputs = target_outputs(target, &corpus)?;
let unary = ops_by_kind(universe, target.id, OpKind::Unary);
let binary = ops_by_kind(universe, target.id, OpKind::Binary);
let mut pool = input_candidates(kind, &corpus)?;
let mut seen = BTreeSet::new();
for candidate in &pool {
seen.insert(candidate.outputs.clone());
}
for depth in 1..=rules.max_depth {
let mut next = Vec::new();
extend_unary(depth, &pool, &unary, &mut seen, &mut next)?;
extend_binary(depth, &pool, &binary, &mut seen, &mut next)?;
if let Some(hit) = exact_match(target, &target_outputs, &next) {
return Ok(Some(DecompositionFinding {
op_id: target.id.to_string(),
composition: hit.repr.clone(),
depth: hit.depth,
witnesses: corpus.len(),
}));
}
next.sort_by_key(|candidate| candidate.mismatch_count(&target_outputs));
next.truncate(rules.beam_width);
pool.extend(next);
}
Ok(None)
}
}
fn target_outputs(
target: &OpSpec,
corpus: &[Vec<u8>],
) -> Result<Vec<[u8; 4]>, DecompositionError> {
corpus
.iter()
.map(|input| eval_u32(target.id, target.cpu_fn, input))
.collect()
}
fn ops_by_kind<'a>(universe: &'a [OpSpec], target_id: &str, kind: OpKind) -> Vec<&'a OpSpec> {
universe
.iter()
.filter(|spec| spec.id != target_id)
.filter(|spec| OpKind::from_spec(spec) == Some(kind))
.collect()
}
fn input_candidates(
kind: OpKind,
corpus: &[Vec<u8>],
) -> Result<Vec<Candidate>, DecompositionError> {
let arity = match kind {
OpKind::Unary => 1,
OpKind::Binary => 2,
};
let mut out = Vec::with_capacity(arity);
for idx in 0..arity {
out.push(input_candidate(idx, corpus)?);
}
Ok(out)
}
fn input_candidate(idx: usize, corpus: &[Vec<u8>]) -> Result<Candidate, DecompositionError> {
let mut outputs = Vec::with_capacity(corpus.len());
let offset = idx * 4;
for input in corpus {
let slice = input.get(offset..offset + 4).ok_or_else(|| {
DecompositionError::new(format!(
"corpus input too short for input {idx}. Fix: route decomposition through parity generators that honor min_input_bytes."
))
})?;
let mut bytes = [0_u8; 4];
bytes.copy_from_slice(slice);
outputs.push(bytes);
}
Ok(Candidate {
repr: format!("x{idx}"),
depth: 0,
outputs,
})
}
fn extend_unary(
depth: usize,
pool: &[Candidate],
ops: &[&OpSpec],
seen: &mut BTreeSet<Vec<[u8; 4]>>,
next: &mut Vec<Candidate>,
) -> Result<(), DecompositionError> {
for op in ops {
for inner in pool.iter().filter(|candidate| candidate.depth + 1 == depth) {
let outputs = apply_unary(op, inner)?;
if seen.insert(outputs.clone()) {
next.push(Candidate {
repr: format!("{}({})", op.id, inner.repr),
depth,
outputs,
});
}
}
}
Ok(())
}
fn extend_binary(
depth: usize,
pool: &[Candidate],
ops: &[&OpSpec],
seen: &mut BTreeSet<Vec<[u8; 4]>>,
next: &mut Vec<Candidate>,
) -> Result<(), DecompositionError> {
for op in ops {
for left in pool {
for right in pool {
if left.depth.max(right.depth) + 1 != depth {
continue;
}
let outputs = apply_binary(op, left, right)?;
if seen.insert(outputs.clone()) {
next.push(Candidate {
repr: format!("{}({}, {})", op.id, left.repr, right.repr),
depth,
outputs,
});
}
}
}
}
Ok(())
}
fn apply_unary(op: &OpSpec, inner: &Candidate) -> Result<Vec<[u8; 4]>, DecompositionError> {
inner
.outputs
.iter()
.map(|bytes| eval_u32(op.id, op.cpu_fn, bytes))
.collect()
}
fn apply_binary(
op: &OpSpec,
left: &Candidate,
right: &Candidate,
) -> Result<Vec<[u8; 4]>, DecompositionError> {
left.outputs
.iter()
.zip(right.outputs.iter())
.map(|(left, right)| {
let input = join_binary(left, right);
eval_u32(op.id, op.cpu_fn, &input)
})
.collect()
}
fn exact_match<'a>(
target: &OpSpec,
expected: &[[u8; 4]],
candidates: &'a [Candidate],
) -> Option<&'a Candidate> {
candidates
.iter()
.find(|candidate| candidate.repr != target.id && candidate.outputs == expected)
}
}
mod search_trait {
use crate::enforce::enforcers::decomposition::config::DecompositionRules;
use crate::enforce::enforcers::decomposition::error::DecompositionError;
use crate::enforce::enforcers::decomposition::finding::DecompositionFinding;
use crate::spec::OpSpec;
pub trait DecompositionSearch {
fn search(
&self,
target: &OpSpec,
universe: &[OpSpec],
rules: &DecompositionRules,
) -> Result<Option<DecompositionFinding>, DecompositionError>;
}
}
pub struct DecompositionEnforcer;
impl crate::enforce::EnforceGate for DecompositionEnforcer {
fn id(&self) -> &'static str {
"decomposition"
}
fn name(&self) -> &'static str {
"decomposition"
}
fn run(&self, ctx: &crate::enforce::EnforceCtx<'_>) -> Vec<crate::enforce::Finding> {
let report = enforce_registry(ctx.specs);
crate::enforce::finding_result(self.id(), report.messages())
}
}
pub const REGISTERED: DecompositionEnforcer = DecompositionEnforcer;