use std::path::{Path, PathBuf};
#[derive(Debug, Clone, Default, serde::Deserialize, serde::Serialize)]
pub struct ManifestFile {
pub name: String,
pub size_bytes: u64,
pub sha256: String,
pub primary_url: String,
pub fallback_urls: Vec<String>,
}
#[derive(Debug, Clone, Default, serde::Deserialize, serde::Serialize)]
pub struct Manifest {
pub model_name: String,
pub version: String,
pub chunk_count: u32,
pub files: Vec<ManifestFile>,
#[serde(default)]
pub model_id: String,
#[serde(default)]
pub label_space_version: String,
#[serde(default)]
pub default: bool,
}
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
pub struct MultiModelManifest {
pub models: Vec<Manifest>,
}
impl MultiModelManifest {
pub fn deserialize_compat(json: &str) -> Result<Self, serde_json::Error> {
if let Ok(multi) = serde_json::from_str::<MultiModelManifest>(json) {
return Ok(multi);
}
let single: Manifest = serde_json::from_str(json)?;
Ok(MultiModelManifest {
models: vec![Manifest {
default: true,
..single
}],
})
}
pub fn select(&self, model_id: Option<&str>) -> Option<&Manifest> {
match model_id {
Some(id) if !id.is_empty() => self.models.iter().find(|m| m.model_id == id),
_ => self
.models
.iter()
.find(|m| m.default)
.or(self.models.first()),
}
}
}
#[derive(Debug, Clone)]
pub struct ModelPaths {
pub onnx: PathBuf,
pub tokenizer: PathBuf,
pub config: PathBuf,
}
impl ModelPaths {
pub fn dir(&self) -> &Path {
self.onnx
.parent()
.expect("ModelPaths.onnx 必有父目录(由 ensure_model_available 保证)")
}
}
pub fn placeholder_manifest() -> Manifest {
Manifest {
model_name: "privacy-filter".to_string(),
version: "0.5.1".to_string(),
chunk_count: 16,
model_id: "openai-privacy-filter-v1".to_string(),
label_space_version: "8class-v1".to_string(),
default: true,
files: vec![
ManifestFile {
name: "model_q4f16.onnx".to_string(),
size_bytes: 165744,
sha256: "eaae4e83cf1345a60abe333ed882b55fe5775d1dfbf34b9b269e5e5416f45e5b".to_string(),
primary_url: "https://huggingface.co/openai/privacy-filter/resolve/main/onnx/model_q4f16.onnx".to_string(),
fallback_urls: vec![],
},
ManifestFile {
name: "model_q4f16.onnx_data".to_string(),
size_bytes: 809061992,
sha256: "6d4dde787e03ace283c45d4e32a94eec32b6cfcc242e7219bea96f5b4c13569d".to_string(),
primary_url: "https://huggingface.co/openai/privacy-filter/resolve/main/onnx/model_q4f16.onnx_data".to_string(),
fallback_urls: vec![],
},
ManifestFile {
name: "tokenizer.json".to_string(),
size_bytes: 27868174,
sha256: "0614fe83cadab421296e664e1f48f4261fa8fef6e03e63bb75c20f38e37d07d3".to_string(),
primary_url: "https://huggingface.co/openai/privacy-filter/resolve/main/tokenizer.json".to_string(),
fallback_urls: vec![],
},
ManifestFile {
name: "config.json".to_string(),
size_bytes: 3039,
sha256: "b2b26a4a4a000639ad30b0c264adbefe365bdb567fbd7bb27303b8c438375bd1".to_string(),
primary_url: "https://huggingface.co/openai/privacy-filter/resolve/main/config.json".to_string(),
fallback_urls: vec![],
},
],
}
}
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)]
mod tests_v07_alpha3 {
use super::*;
#[test]
fn legacy_single_manifest_deserialize_compatible() {
let legacy_json = r#"{
"model_name": "privacy-filter",
"version": "0.5.1",
"chunk_count": 16,
"files": []
}"#;
let m: Manifest =
serde_json::from_str(legacy_json).expect("老 schema(无三层 pin 字段)应能正常 deser");
assert_eq!(m.model_name, "privacy-filter");
assert_eq!(m.model_id, "", "缺字段应走 serde default 空串");
assert_eq!(m.label_space_version, "");
assert!(!m.default, "缺字段应 default = false");
}
#[test]
fn new_schema_multi_model_deserialize() {
let new_json = r#"{
"models": [
{
"model_name": "privacy-filter",
"version": "1.0.0",
"chunk_count": 16,
"model_id": "openai-privacy-filter-v1",
"label_space_version": "8class-v1",
"default": true,
"files": []
},
{
"model_name": "xlm-r-pii",
"version": "1.0.0",
"chunk_count": 16,
"model_id": "xlm-r-pii-v1",
"label_space_version": "8class-v1",
"default": false,
"files": []
}
]
}"#;
let multi = MultiModelManifest::deserialize_compat(new_json)
.expect("新 schema 顶层 models array 应能 deser");
assert_eq!(multi.models.len(), 2);
assert_eq!(multi.models[0].model_id, "openai-privacy-filter-v1");
assert!(multi.models[0].default);
assert!(!multi.models[1].default);
}
#[test]
fn legacy_schema_via_compat_wraps_to_single_array_with_default() {
let legacy_json = r#"{
"model_name": "privacy-filter",
"version": "0.5.1",
"chunk_count": 16,
"files": []
}"#;
let multi = MultiModelManifest::deserialize_compat(legacy_json)
.expect("老 schema 通过 compat 应包成单元素 array");
assert_eq!(multi.models.len(), 1, "老 schema 应包成单元素 array");
assert!(
multi.models[0].default,
"compat 路径强制 default = true(单元素必为默认)"
);
assert_eq!(multi.models[0].model_name, "privacy-filter");
}
#[test]
fn select_by_explicit_id_finds_or_none() {
let multi = MultiModelManifest {
models: vec![
Manifest {
model_name: "a".to_string(),
version: "1".to_string(),
chunk_count: 16,
model_id: "id-a".to_string(),
label_space_version: "v1".to_string(),
default: false,
files: vec![],
},
Manifest {
model_name: "b".to_string(),
version: "1".to_string(),
chunk_count: 16,
model_id: "id-b".to_string(),
label_space_version: "v1".to_string(),
default: true,
files: vec![],
},
],
};
assert_eq!(multi.select(Some("id-a")).unwrap().model_name, "a");
assert_eq!(multi.select(Some("id-b")).unwrap().model_name, "b");
assert!(multi.select(Some("id-nonexistent")).is_none());
}
#[test]
fn select_default_or_first() {
let mut multi = MultiModelManifest {
models: vec![
Manifest {
model_name: "first".to_string(),
version: "1".to_string(),
chunk_count: 16,
model_id: "id-first".to_string(),
label_space_version: "v1".to_string(),
default: false,
files: vec![],
},
Manifest {
model_name: "default".to_string(),
version: "1".to_string(),
chunk_count: 16,
model_id: "id-default".to_string(),
label_space_version: "v1".to_string(),
default: true,
files: vec![],
},
],
};
assert_eq!(multi.select(None).unwrap().model_name, "default");
multi.models[1].default = false;
assert_eq!(multi.select(None).unwrap().model_name, "first");
assert_eq!(multi.select(Some("")).unwrap().model_name, "first");
}
#[test]
fn placeholder_manifest_three_pin_values() {
let m = placeholder_manifest();
assert_eq!(m.model_id, "openai-privacy-filter-v1");
assert_eq!(m.label_space_version, "8class-v1");
assert!(m.default, "单模型 placeholder 应 default = true");
}
}