use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use std::collections::BTreeMap;
use std::fmt::Write as _;
use crate::SearchError;
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct CommitRange {
pub low: u64,
pub high: u64,
}
impl CommitRange {
#[must_use]
pub const fn len(&self) -> u64 {
if self.high < self.low {
return 0;
}
self.high - self.low + 1
}
#[must_use]
pub const fn is_empty(&self) -> bool {
self.high < self.low
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct EmbedderRevision {
pub model_name: String,
pub weights_hash: String,
pub dimension: u32,
pub quantization: QuantizationFormat,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum QuantizationFormat {
F32,
F16,
Int8,
Int4,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct VectorArtifact {
pub path: String,
pub size_bytes: u64,
pub checksum: String,
pub vector_count: u64,
pub dimension: u32,
pub embedder_tier: EmbedderTierTag,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum EmbedderTierTag {
Fast,
Quality,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct LexicalArtifact {
pub path: String,
pub size_bytes: u64,
pub checksum: String,
pub document_count: u64,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct RepairDescriptor {
pub protected_artifact: String,
pub sidecar_path: String,
pub source_symbols: u32,
pub repair_symbols: u32,
pub overhead_ratio: f64,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ActivationInvariant {
pub id: String,
pub description: String,
pub kind: InvariantKind,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum InvariantKind {
AllArtifactsVerified,
EmbedderRevisionMatch,
VectorCountConsistency {
expected_total: u64,
},
CommitContinuity {
previous_high: u64,
},
Custom {
check_name: String,
},
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct GenerationManifest {
pub schema_version: u32,
pub generation_id: String,
pub manifest_hash: String,
pub commit_range: CommitRange,
pub build_started_at: u64,
pub build_completed_at: u64,
pub embedders: BTreeMap<String, EmbedderRevision>,
pub vector_artifacts: Vec<VectorArtifact>,
pub lexical_artifacts: Vec<LexicalArtifact>,
pub repair_descriptors: Vec<RepairDescriptor>,
pub activation_invariants: Vec<ActivationInvariant>,
pub total_documents: u64,
pub metadata: BTreeMap<String, String>,
}
pub const MANIFEST_SCHEMA_VERSION: u32 = 1;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ValidationResult {
pub findings: Vec<ValidationFinding>,
}
impl ValidationResult {
#[must_use]
pub fn is_valid(&self) -> bool {
self.findings
.iter()
.all(|f| f.severity != FindingSeverity::Error)
}
#[must_use]
pub fn errors(&self) -> Vec<&ValidationFinding> {
self.findings
.iter()
.filter(|f| f.severity == FindingSeverity::Error)
.collect()
}
#[must_use]
pub fn warnings(&self) -> Vec<&ValidationFinding> {
self.findings
.iter()
.filter(|f| f.severity == FindingSeverity::Warning)
.collect()
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ValidationFinding {
pub check: &'static str,
pub severity: FindingSeverity,
pub message: String,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum FindingSeverity {
Info,
Warning,
Error,
}
#[must_use]
pub fn validate_manifest(manifest: &GenerationManifest) -> ValidationResult {
let mut findings = Vec::new();
check_schema_version(manifest, &mut findings);
check_generation_id(manifest, &mut findings);
check_manifest_hash(manifest, &mut findings);
check_commit_range(manifest, &mut findings);
check_timestamps(manifest, &mut findings);
check_embedders(manifest, &mut findings);
check_vector_artifacts(manifest, &mut findings);
check_lexical_artifacts(manifest, &mut findings);
check_repair_descriptors(manifest, &mut findings);
check_activation_invariants(manifest, &mut findings);
check_document_count_consistency(manifest, &mut findings);
ValidationResult { findings }
}
pub fn compute_manifest_hash(manifest: &GenerationManifest) -> crate::SearchResult<String> {
let mut canonical = manifest.clone();
canonical.manifest_hash.clear();
let serialized =
serde_json::to_vec(&canonical).map_err(|source| SearchError::SubsystemError {
subsystem: "generation_manifest",
source: Box::new(source),
})?;
Ok(lower_hex(Sha256::digest(serialized)))
}
fn lower_hex(bytes: impl AsRef<[u8]>) -> String {
let bytes = bytes.as_ref();
let mut hex = String::with_capacity(bytes.len() * 2);
for byte in bytes {
let _ = write!(&mut hex, "{byte:02x}");
}
hex
}
pub fn require_valid(result: &ValidationResult) -> crate::SearchResult<()> {
if result.is_valid() {
return Ok(());
}
let messages: Vec<String> = result.errors().iter().map(|f| f.message.clone()).collect();
Err(SearchError::InvalidConfig {
field: "generation_manifest".into(),
value: String::new(),
reason: messages.join("; "),
})
}
fn check_schema_version(m: &GenerationManifest, f: &mut Vec<ValidationFinding>) {
if m.schema_version == 0 {
f.push(ValidationFinding {
check: "schema_version",
severity: FindingSeverity::Error,
message: "schema_version must be >= 1".into(),
});
} else if m.schema_version > MANIFEST_SCHEMA_VERSION {
f.push(ValidationFinding {
check: "schema_version",
severity: FindingSeverity::Warning,
message: format!(
"schema_version {} is newer than supported {}; forward-compat may lose fields",
m.schema_version, MANIFEST_SCHEMA_VERSION
),
});
}
}
fn check_generation_id(m: &GenerationManifest, f: &mut Vec<ValidationFinding>) {
if m.generation_id.is_empty() {
f.push(ValidationFinding {
check: "generation_id",
severity: FindingSeverity::Error,
message: "generation_id must not be empty".into(),
});
}
}
fn check_manifest_hash(m: &GenerationManifest, f: &mut Vec<ValidationFinding>) {
if m.manifest_hash.is_empty() {
f.push(ValidationFinding {
check: "manifest_hash",
severity: FindingSeverity::Error,
message: "manifest_hash must not be empty".into(),
});
return;
}
if !is_valid_sha256_hex(&m.manifest_hash) {
f.push(ValidationFinding {
check: "manifest_hash",
severity: FindingSeverity::Error,
message: "manifest_hash must be 64 lowercase/uppercase hex chars".into(),
});
return;
}
match compute_manifest_hash(m) {
Ok(expected) => {
if !m.manifest_hash.eq_ignore_ascii_case(&expected) {
f.push(ValidationFinding {
check: "manifest_hash",
severity: FindingSeverity::Error,
message: format!(
"manifest_hash does not match canonical manifest body (expected {expected})"
),
});
}
}
Err(err) => {
f.push(ValidationFinding {
check: "manifest_hash",
severity: FindingSeverity::Error,
message: format!("failed to recompute manifest_hash: {err}"),
});
}
}
}
fn is_valid_sha256_hex(value: &str) -> bool {
value.len() == 64 && value.chars().all(|c| c.is_ascii_hexdigit())
}
fn check_commit_range(m: &GenerationManifest, f: &mut Vec<ValidationFinding>) {
if m.commit_range.is_empty() {
f.push(ValidationFinding {
check: "commit_range",
severity: FindingSeverity::Error,
message: format!(
"commit_range is invalid: high ({}) < low ({})",
m.commit_range.high, m.commit_range.low
),
});
}
}
fn check_timestamps(m: &GenerationManifest, f: &mut Vec<ValidationFinding>) {
if m.build_started_at == 0 {
f.push(ValidationFinding {
check: "build_started_at",
severity: FindingSeverity::Error,
message: "build_started_at must be a positive Unix timestamp".into(),
});
}
if m.build_completed_at == 0 {
f.push(ValidationFinding {
check: "build_completed_at",
severity: FindingSeverity::Error,
message: "build_completed_at must be a positive Unix timestamp".into(),
});
}
if m.build_completed_at < m.build_started_at {
f.push(ValidationFinding {
check: "build_timestamps",
severity: FindingSeverity::Error,
message: format!(
"build_completed_at ({}) is before build_started_at ({})",
m.build_completed_at, m.build_started_at
),
});
}
}
fn check_embedders(m: &GenerationManifest, f: &mut Vec<ValidationFinding>) {
if m.embedders.is_empty() {
f.push(ValidationFinding {
check: "embedders",
severity: FindingSeverity::Error,
message: "at least one embedder revision must be specified".into(),
});
}
for (key, rev) in &m.embedders {
if rev.model_name.is_empty() {
f.push(ValidationFinding {
check: "embedder_model_name",
severity: FindingSeverity::Error,
message: format!("embedder '{key}' has empty model_name"),
});
}
if rev.weights_hash.is_empty() {
f.push(ValidationFinding {
check: "embedder_weights_hash",
severity: FindingSeverity::Error,
message: format!("embedder '{key}' has empty weights_hash"),
});
}
if rev.dimension == 0 {
f.push(ValidationFinding {
check: "embedder_dimension",
severity: FindingSeverity::Error,
message: format!("embedder '{key}' has dimension 0"),
});
}
}
}
fn check_vector_artifacts(m: &GenerationManifest, f: &mut Vec<ValidationFinding>) {
for (i, art) in m.vector_artifacts.iter().enumerate() {
if art.path.is_empty() {
f.push(ValidationFinding {
check: "vector_artifact_path",
severity: FindingSeverity::Error,
message: format!("vector_artifacts[{i}] has empty path"),
});
}
if art.checksum.is_empty() {
f.push(ValidationFinding {
check: "vector_artifact_checksum",
severity: FindingSeverity::Error,
message: format!("vector_artifacts[{i}] '{}' has empty checksum", art.path),
});
}
if art.dimension == 0 {
f.push(ValidationFinding {
check: "vector_artifact_dimension",
severity: FindingSeverity::Error,
message: format!("vector_artifacts[{i}] '{}' has dimension 0", art.path),
});
}
}
let mut seen = std::collections::HashSet::new();
for art in &m.vector_artifacts {
if !seen.insert(&art.path) {
f.push(ValidationFinding {
check: "vector_artifact_duplicate",
severity: FindingSeverity::Error,
message: format!("duplicate vector artifact path: '{}'", art.path),
});
}
}
}
fn check_lexical_artifacts(m: &GenerationManifest, f: &mut Vec<ValidationFinding>) {
for (i, art) in m.lexical_artifacts.iter().enumerate() {
if art.path.is_empty() {
f.push(ValidationFinding {
check: "lexical_artifact_path",
severity: FindingSeverity::Error,
message: format!("lexical_artifacts[{i}] has empty path"),
});
}
if art.checksum.is_empty() {
f.push(ValidationFinding {
check: "lexical_artifact_checksum",
severity: FindingSeverity::Error,
message: format!("lexical_artifacts[{i}] '{}' has empty checksum", art.path),
});
}
}
let mut seen = std::collections::HashSet::new();
for art in &m.lexical_artifacts {
if !seen.insert(&art.path) {
f.push(ValidationFinding {
check: "lexical_artifact_duplicate",
severity: FindingSeverity::Error,
message: format!("duplicate lexical artifact path: '{}'", art.path),
});
}
}
}
fn check_repair_descriptors(m: &GenerationManifest, f: &mut Vec<ValidationFinding>) {
let all_artifact_paths: std::collections::HashSet<&str> = m
.vector_artifacts
.iter()
.map(|a| a.path.as_str())
.chain(m.lexical_artifacts.iter().map(|a| a.path.as_str()))
.collect();
for (i, rd) in m.repair_descriptors.iter().enumerate() {
if !all_artifact_paths.contains(rd.protected_artifact.as_str()) {
f.push(ValidationFinding {
check: "repair_descriptor_target",
severity: FindingSeverity::Error,
message: format!(
"repair_descriptors[{i}] references unknown artifact '{}'",
rd.protected_artifact
),
});
}
if rd.source_symbols == 0 {
f.push(ValidationFinding {
check: "repair_descriptor_symbols",
severity: FindingSeverity::Error,
message: format!(
"repair_descriptors[{i}] for '{}' has 0 source symbols",
rd.protected_artifact
),
});
}
if rd.overhead_ratio.is_nan() || rd.overhead_ratio < 0.0 || rd.overhead_ratio > 10.0 {
f.push(ValidationFinding {
check: "repair_descriptor_overhead",
severity: FindingSeverity::Warning,
message: format!(
"repair_descriptors[{i}] overhead ratio {} is outside expected range [0, 10]",
rd.overhead_ratio
),
});
}
}
}
fn check_activation_invariants(m: &GenerationManifest, f: &mut Vec<ValidationFinding>) {
let mut seen_ids = std::collections::HashSet::new();
for inv in &m.activation_invariants {
if inv.id.is_empty() {
f.push(ValidationFinding {
check: "invariant_id",
severity: FindingSeverity::Error,
message: "activation invariant has empty id".into(),
});
}
if !seen_ids.insert(&inv.id) {
f.push(ValidationFinding {
check: "invariant_duplicate",
severity: FindingSeverity::Error,
message: format!("duplicate activation invariant id: '{}'", inv.id),
});
}
}
}
fn check_document_count_consistency(m: &GenerationManifest, f: &mut Vec<ValidationFinding>) {
let vector_total: u64 = m.vector_artifacts.iter().map(|a| a.vector_count).sum();
let lexical_total: u64 = m.lexical_artifacts.iter().map(|a| a.document_count).sum();
if m.total_documents == 0 && (!m.vector_artifacts.is_empty() || !m.lexical_artifacts.is_empty())
{
f.push(ValidationFinding {
check: "total_documents",
severity: FindingSeverity::Error,
message: "total_documents is 0 but artifacts are present".into(),
});
}
if !m.lexical_artifacts.is_empty() && lexical_total != m.total_documents {
f.push(ValidationFinding {
check: "lexical_document_count",
severity: FindingSeverity::Warning,
message: format!(
"lexical document count ({lexical_total}) != total_documents ({})",
m.total_documents
),
});
}
if !m.vector_artifacts.is_empty()
&& vector_total != m.total_documents
&& vector_total != m.total_documents * 2
{
f.push(ValidationFinding {
check: "vector_count_consistency",
severity: FindingSeverity::Warning,
message: format!(
"vector count ({vector_total}) doesn't match total_documents ({}) or 2x (two-tier)",
m.total_documents
),
});
}
}
#[cfg(test)]
mod tests {
use super::*;
fn sample_embedder() -> EmbedderRevision {
EmbedderRevision {
model_name: "potion-128M".into(),
weights_hash: "abcdef1234567890".into(),
dimension: 256,
quantization: QuantizationFormat::F16,
}
}
fn sample_vector_artifact(path: &str, count: u64) -> VectorArtifact {
VectorArtifact {
path: path.into(),
size_bytes: 1024,
checksum: "deadbeef".into(),
vector_count: count,
dimension: 256,
embedder_tier: EmbedderTierTag::Fast,
}
}
fn sample_lexical_artifact(path: &str, count: u64) -> LexicalArtifact {
LexicalArtifact {
path: path.into(),
size_bytes: 2048,
checksum: "cafebabe".into(),
document_count: count,
}
}
fn valid_manifest() -> GenerationManifest {
let mut embedders = BTreeMap::new();
embedders.insert("fast".into(), sample_embedder());
let mut manifest = GenerationManifest {
schema_version: MANIFEST_SCHEMA_VERSION,
generation_id: "gen-001".into(),
manifest_hash: String::new(),
commit_range: CommitRange { low: 1, high: 100 },
build_started_at: 1_700_000_000_000,
build_completed_at: 1_700_000_060_000,
embedders,
vector_artifacts: vec![sample_vector_artifact("vectors/shard_0.fsvi", 100)],
lexical_artifacts: vec![sample_lexical_artifact("lexical/segment_0", 100)],
repair_descriptors: vec![RepairDescriptor {
protected_artifact: "vectors/shard_0.fsvi".into(),
sidecar_path: "vectors/shard_0.fsvi.fec".into(),
source_symbols: 64,
repair_symbols: 13,
overhead_ratio: 0.2,
}],
activation_invariants: vec![
ActivationInvariant {
id: "all_artifacts".into(),
description: "All artifacts verified".into(),
kind: InvariantKind::AllArtifactsVerified,
},
ActivationInvariant {
id: "embedder_match".into(),
description: "Embedder revision matches runtime".into(),
kind: InvariantKind::EmbedderRevisionMatch,
},
],
total_documents: 100,
metadata: BTreeMap::new(),
};
manifest.manifest_hash = compute_manifest_hash(&manifest).expect("hash");
manifest
}
fn refresh_manifest_hash(manifest: &mut GenerationManifest) {
manifest.manifest_hash = compute_manifest_hash(manifest).expect("hash");
}
#[test]
fn valid_manifest_passes() {
let m = valid_manifest();
let r = validate_manifest(&m);
assert!(r.is_valid(), "findings: {:#?}", r.findings);
assert!(r.errors().is_empty());
}
#[test]
fn schema_version_zero_is_error() {
let mut m = valid_manifest();
m.schema_version = 0;
let r = validate_manifest(&m);
assert!(!r.is_valid());
assert!(r.errors().iter().any(|f| f.check == "schema_version"));
}
#[test]
fn future_schema_version_is_warning() {
let mut m = valid_manifest();
m.schema_version = MANIFEST_SCHEMA_VERSION + 1;
refresh_manifest_hash(&mut m);
let r = validate_manifest(&m);
assert!(r.is_valid());
assert!(!r.warnings().is_empty());
}
#[test]
fn empty_generation_id_is_error() {
let mut m = valid_manifest();
m.generation_id = String::new();
let r = validate_manifest(&m);
assert!(!r.is_valid());
assert!(r.errors().iter().any(|f| f.check == "generation_id"));
}
#[test]
fn empty_manifest_hash_is_error() {
let mut m = valid_manifest();
m.manifest_hash.clear();
let r = validate_manifest(&m);
assert!(!r.is_valid());
assert!(r.errors().iter().any(|f| f.check == "manifest_hash"));
}
#[test]
fn malformed_manifest_hash_is_error() {
let mut m = valid_manifest();
m.manifest_hash = "not-a-sha256".into();
let r = validate_manifest(&m);
assert!(!r.is_valid());
assert!(r.errors().iter().any(|f| f.check == "manifest_hash"));
}
#[test]
fn mismatched_manifest_hash_is_error() {
let mut m = valid_manifest();
m.manifest_hash = "0".repeat(64);
let r = validate_manifest(&m);
assert!(!r.is_valid());
assert!(
r.errors()
.iter()
.any(|f| f.check == "manifest_hash"
&& f.message.contains("does not match canonical"))
);
}
#[test]
fn manifest_hash_match_is_case_insensitive() {
let mut m = valid_manifest();
m.manifest_hash = m.manifest_hash.to_uppercase();
let r = validate_manifest(&m);
assert!(r.is_valid());
}
#[test]
fn invalid_commit_range_is_error() {
let mut m = valid_manifest();
m.commit_range = CommitRange { low: 50, high: 10 };
let r = validate_manifest(&m);
assert!(!r.is_valid());
assert!(r.errors().iter().any(|f| f.check == "commit_range"));
}
#[test]
fn zero_timestamps_are_errors() {
let mut m = valid_manifest();
m.build_started_at = 0;
let r = validate_manifest(&m);
assert!(!r.is_valid());
assert!(r.errors().iter().any(|f| f.check == "build_started_at"));
}
#[test]
fn completed_before_started_is_error() {
let mut m = valid_manifest();
m.build_completed_at = m.build_started_at - 1;
let r = validate_manifest(&m);
assert!(!r.is_valid());
assert!(r.errors().iter().any(|f| f.check == "build_timestamps"));
}
#[test]
fn no_embedders_is_error() {
let mut m = valid_manifest();
m.embedders.clear();
let r = validate_manifest(&m);
assert!(!r.is_valid());
assert!(r.errors().iter().any(|f| f.check == "embedders"));
}
#[test]
fn embedder_empty_fields_are_errors() {
let mut m = valid_manifest();
m.embedders.insert(
"bad".into(),
EmbedderRevision {
model_name: String::new(),
weights_hash: String::new(),
dimension: 0,
quantization: QuantizationFormat::F16,
},
);
let r = validate_manifest(&m);
assert!(!r.is_valid());
let errors = r.errors();
assert!(errors.iter().any(|f| f.check == "embedder_model_name"));
assert!(errors.iter().any(|f| f.check == "embedder_weights_hash"));
assert!(errors.iter().any(|f| f.check == "embedder_dimension"));
}
#[test]
fn duplicate_vector_artifact_paths_is_error() {
let mut m = valid_manifest();
m.vector_artifacts
.push(sample_vector_artifact("vectors/shard_0.fsvi", 100));
m.total_documents = 200;
let r = validate_manifest(&m);
assert!(!r.is_valid());
assert!(
r.errors()
.iter()
.any(|f| f.check == "vector_artifact_duplicate")
);
}
#[test]
fn empty_artifact_path_is_error() {
let mut m = valid_manifest();
m.vector_artifacts.push(sample_vector_artifact("", 10));
let r = validate_manifest(&m);
assert!(!r.is_valid());
assert!(r.errors().iter().any(|f| f.check == "vector_artifact_path"));
}
#[test]
fn empty_artifact_checksum_is_error() {
let mut m = valid_manifest();
m.vector_artifacts[0].checksum = String::new();
let r = validate_manifest(&m);
assert!(!r.is_valid());
assert!(
r.errors()
.iter()
.any(|f| f.check == "vector_artifact_checksum")
);
}
#[test]
fn repair_descriptor_unknown_artifact_is_error() {
let mut m = valid_manifest();
m.repair_descriptors.push(RepairDescriptor {
protected_artifact: "nonexistent.fsvi".into(),
sidecar_path: "nonexistent.fsvi.fec".into(),
source_symbols: 10,
repair_symbols: 2,
overhead_ratio: 0.2,
});
let r = validate_manifest(&m);
assert!(!r.is_valid());
assert!(
r.errors()
.iter()
.any(|f| f.check == "repair_descriptor_target")
);
}
#[test]
fn repair_descriptor_zero_source_symbols_is_error() {
let mut m = valid_manifest();
m.repair_descriptors[0].source_symbols = 0;
let r = validate_manifest(&m);
assert!(!r.is_valid());
assert!(
r.errors()
.iter()
.any(|f| f.check == "repair_descriptor_symbols")
);
}
#[test]
fn extreme_repair_overhead_is_warning() {
let mut m = valid_manifest();
m.repair_descriptors[0].overhead_ratio = 15.0;
refresh_manifest_hash(&mut m);
let r = validate_manifest(&m);
assert!(r.is_valid());
assert!(
r.warnings()
.iter()
.any(|f| f.check == "repair_descriptor_overhead")
);
}
#[test]
fn duplicate_invariant_id_is_error() {
let mut m = valid_manifest();
m.activation_invariants
.push(m.activation_invariants[0].clone());
let r = validate_manifest(&m);
assert!(!r.is_valid());
assert!(r.errors().iter().any(|f| f.check == "invariant_duplicate"));
}
#[test]
fn zero_total_documents_with_artifacts_is_error() {
let mut m = valid_manifest();
m.total_documents = 0;
let r = validate_manifest(&m);
assert!(!r.is_valid());
assert!(r.errors().iter().any(|f| f.check == "total_documents"));
}
#[test]
fn lexical_count_mismatch_is_warning() {
let mut m = valid_manifest();
m.lexical_artifacts[0].document_count = 50; refresh_manifest_hash(&mut m);
let r = validate_manifest(&m);
assert!(r.is_valid());
assert!(
r.warnings()
.iter()
.any(|f| f.check == "lexical_document_count")
);
}
#[test]
fn two_tier_vector_count_accepted() {
let mut m = valid_manifest();
m.vector_artifacts = vec![
sample_vector_artifact("vectors/fast.fsvi", 100),
sample_vector_artifact("vectors/quality.fsvi", 100),
];
m.vector_artifacts[1].embedder_tier = EmbedderTierTag::Quality;
m.repair_descriptors[0].protected_artifact = "vectors/fast.fsvi".into();
refresh_manifest_hash(&mut m);
let r = validate_manifest(&m);
assert!(r.is_valid(), "findings: {:#?}", r.findings);
}
#[test]
fn serde_roundtrip() {
let m = valid_manifest();
let json = serde_json::to_string_pretty(&m).expect("serialize");
let deserialized: GenerationManifest = serde_json::from_str(&json).expect("deserialize");
assert_eq!(m, deserialized);
}
#[test]
fn commit_range_len_and_empty() {
let range = CommitRange { low: 5, high: 10 };
assert_eq!(range.len(), 6);
assert!(!range.is_empty());
let empty = CommitRange { low: 10, high: 5 };
assert!(empty.is_empty());
}
#[test]
fn single_commit_range() {
let range = CommitRange { low: 42, high: 42 };
assert_eq!(range.len(), 1);
assert!(!range.is_empty());
}
#[test]
fn require_valid_passes_for_valid_manifest() {
let m = valid_manifest();
let r = validate_manifest(&m);
assert!(require_valid(&r).is_ok());
}
#[test]
fn require_valid_fails_for_invalid_manifest() {
let mut m = valid_manifest();
m.generation_id = String::new();
let r = validate_manifest(&m);
let err = require_valid(&r).unwrap_err();
assert!(matches!(err, SearchError::InvalidConfig { .. }));
}
#[test]
fn empty_manifest_collects_multiple_errors() {
let m = GenerationManifest {
schema_version: 0,
generation_id: String::new(),
manifest_hash: String::new(),
commit_range: CommitRange { low: 10, high: 5 },
build_started_at: 0,
build_completed_at: 0,
embedders: BTreeMap::new(),
vector_artifacts: vec![],
lexical_artifacts: vec![],
repair_descriptors: vec![],
activation_invariants: vec![],
total_documents: 0,
metadata: BTreeMap::new(),
};
let r = validate_manifest(&m);
assert!(!r.is_valid());
assert!(r.errors().len() >= 5, "found {} errors", r.errors().len());
}
#[test]
fn metadata_is_preserved() {
let mut m = valid_manifest();
m.metadata.insert("build_host".into(), "node-7".into());
m.metadata.insert("deployment".into(), "production".into());
let json = serde_json::to_string(&m).expect("serialize");
let deserialized: GenerationManifest = serde_json::from_str(&json).expect("deserialize");
assert_eq!(deserialized.metadata.get("build_host").unwrap(), "node-7");
}
#[test]
fn invariant_kinds_serialize() {
let kinds = vec![
InvariantKind::AllArtifactsVerified,
InvariantKind::EmbedderRevisionMatch,
InvariantKind::VectorCountConsistency {
expected_total: 500,
},
InvariantKind::CommitContinuity { previous_high: 99 },
InvariantKind::Custom {
check_name: "custom_check".into(),
},
];
for kind in &kinds {
let json = serde_json::to_string(kind).expect("serialize");
let back: InvariantKind = serde_json::from_str(&json).expect("deserialize");
assert_eq!(kind, &back);
}
}
#[test]
fn quantization_format_serialize() {
for fmt in &[
QuantizationFormat::F32,
QuantizationFormat::F16,
QuantizationFormat::Int8,
QuantizationFormat::Int4,
] {
let json = serde_json::to_string(fmt).expect("serialize");
let back: QuantizationFormat = serde_json::from_str(&json).expect("deserialize");
assert_eq!(fmt, &back);
}
}
}