use std::{
borrow::Cow,
fs::File,
io::{BufWriter, Seek, Write},
path::PathBuf,
};
use crate::{
builder::{
base_binary::{BaseBinaryResolver, TargetSpec},
config::BuildConfig,
model::ModelTypeExt as _,
terminal,
},
format::{
assets::{AssetKind, AssetPlan, AssetSource, PlannedAsset},
codec::EncoderfileCodec,
},
generated::manifest::Backend,
};
use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
#[derive(Debug, Serialize, Deserialize)]
#[serde(transparent)]
pub struct EncoderfileBuilder {
pub config: BuildConfig,
}
impl EncoderfileBuilder {
pub fn new(config: BuildConfig) -> EncoderfileBuilder {
Self { config }
}
pub fn from_file(file_path: &PathBuf) -> Result<EncoderfileBuilder> {
BuildConfig::load(file_path).map(|config| EncoderfileBuilder { config })
}
pub fn build(&self, runtime_version: &Option<String>, no_download: bool) -> Result<()> {
let target = self
.config
.encoderfile
.target()?
.unwrap_or(TargetSpec::detect_host()?);
let base_path = {
let cache_dir = self.config.encoderfile.cache_dir();
let base_binary_path = self.config.encoderfile.base_binary_path.as_deref();
let resolver = BaseBinaryResolver {
cache_dir: cache_dir.as_path(),
base_binary_path,
target,
runtime_version: runtime_version.clone(),
};
resolver.resolve(no_download)?
};
let mut planned_assets: Vec<PlannedAsset<'_>> = Vec::new();
let model_config = self.config.encoderfile.model_config()?;
planned_assets.push(PlannedAsset::from_asset_source(
AssetSource::InMemory(Cow::Owned(serde_json::to_vec(&model_config)?)),
AssetKind::ModelConfig,
)?);
terminal::success("Model config validated");
let model_weights_path = self.config.encoderfile.path.model_weights_path()?;
let model_asset = self
.config
.encoderfile
.model_type
.validate_model(&model_weights_path)?;
planned_assets.push(model_asset);
terminal::success("Model weights validated");
if let Some(asset) =
crate::builder::transforms::validate_transform(&self.config.encoderfile, &model_config)?
{
planned_assets.push(asset);
terminal::success("Transform validated");
}
let tokenizer_asset =
crate::builder::tokenizer::validate_tokenizer(&self.config.encoderfile)?;
planned_assets.push(tokenizer_asset);
terminal::success("Tokenizer validated");
terminal::info("Writing encoderfile...");
let output_path = self.config.encoderfile.output_path();
let out = File::create(output_path.clone()).context(format!(
"Failed to create final encoderfile at {:?}",
output_path.as_path()
))?;
let mut out = BufWriter::new(out);
let mut base = File::open(base_path.as_path()).context(format!(
"Failed to open base binary at {:?}",
base_path.as_path()
))?;
std::io::copy(&mut base, &mut out).context(format!(
"Failed to copy base binary to {:?}",
output_path.as_path()
))?;
let payload_start = out.stream_position()?;
let codec = EncoderfileCodec::new(payload_start);
let asset_plan = AssetPlan::new(planned_assets)?;
codec.write(
self.config.encoderfile.name.clone(),
self.config.encoderfile.version.clone(),
self.config.encoderfile.model_type.clone(),
Backend::Cpu,
&asset_plan,
&mut out,
)?;
out.flush()?;
terminal::success_kv("Encoderfile written to", output_path.display());
Ok(())
}
}