use std::io::{self, Cursor, Write};
use zip::write::SimpleFileOptions;
use zip::ZipWriter;
pub struct BundleMeta {
pub language: String,
pub exported: String,
pub inkhaven_version: String,
pub memory_pairs: usize,
pub lexicon_entries: usize,
}
fn tsv(s: &str) -> String {
s.replace(['\t', '\n', '\r'], " ")
}
fn map_zip(e: zip::result::ZipError) -> io::Error {
io::Error::new(io::ErrorKind::Other, e)
}
pub fn bundle(
meta: &BundleMeta,
memory: &[(String, String)],
lexicon: &[(String, String, String)],
) -> io::Result<Vec<u8>> {
let mut zw = ZipWriter::new(Cursor::new(Vec::new()));
let manifest = format!(
"{{\n \
format: \"inkhaven-translation-memory/v1\"\n \
language: {:?}\n \
exported: {:?}\n \
inkhaven_version: {:?}\n \
memory_pairs: {}\n \
lexicon_entries: {}\n\
}}\n",
meta.language, meta.exported, meta.inkhaven_version, meta.memory_pairs, meta.lexicon_entries,
);
zw.start_file("manifest.hjson", SimpleFileOptions::default()).map_err(map_zip)?;
zw.write_all(manifest.as_bytes())?;
zw.start_file("memory.tsv", SimpleFileOptions::default()).map_err(map_zip)?;
zw.write_all(b"english\tconlang\n")?;
for (en, con) in memory {
writeln!(zw, "{}\t{}", tsv(en), tsv(con))?;
}
zw.start_file("lexicon.tsv", SimpleFileOptions::default()).map_err(map_zip)?;
zw.write_all(b"headword\tpos\tgloss\n")?;
for (w, pos, gloss) in lexicon {
writeln!(zw, "{}\t{}\t{}", tsv(w), tsv(pos), tsv(gloss))?;
}
zw.start_file("README.md", SimpleFileOptions::default()).map_err(map_zip)?;
zw.write_all(readme(meta).as_bytes())?;
let cursor = zw.finish().map_err(map_zip)?;
Ok(cursor.into_inner())
}
fn readme(meta: &BundleMeta) -> String {
format!(
"# {lang} — translation pack\n\
\n\
A portable translation system for the constructed language **{lang}**,\n\
exported from Inkhaven {ver} on {date}.\n\
\n\
This is a *retrieval*-based translation system: the knowledge is a\n\
**translation memory** of confirmed `English → {lang}` sentence pairs,\n\
not a trained neural model. There is nothing to install and no GPU.\n\
\n\
## Contents\n\
\n\
- `memory.tsv` — {pairs} confirmed translations (tab-separated\n\
`english`, `{lang_l}`). Browse it as a phrasebook, or look a sentence up\n\
directly.\n\
- `lexicon.tsv` — {lex} dictionary entries (`headword`, `pos`, `gloss`).\n\
- `manifest.hjson` — metadata.\n\
\n\
## Using it with Inkhaven\n\
\n\
Re-seed another project's memory by replaying the pairs:\n\
\n\
```sh\n\
# for each row of memory.tsv:\n\
inkhaven language remember {lang} --english \"<english>\" --conlang \"<{lang_l}>\"\n\
```\n\
\n\
The embeddings that power semantic recall are not shipped (they are\n\
large and regenerate automatically the next time the pairs are used).\n",
lang = meta.language,
lang_l = meta.language.to_lowercase(),
ver = meta.inkhaven_version,
date = meta.exported,
pairs = meta.memory_pairs,
lex = meta.lexicon_entries,
)
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Read;
#[test]
fn bundle_is_a_readable_zip_with_the_expected_entries() {
let meta = BundleMeta {
language: "Eldar".into(),
exported: "2026-06-22".into(),
inkhaven_version: "1.3.23".into(),
memory_pairs: 1,
lexicon_entries: 1,
};
let memory = vec![("the bird sees the stone".to_string(), "kira nami pata".to_string())];
let lexicon = vec![("kira".to_string(), "noun".to_string(), "bird".to_string())];
let bytes = bundle(&meta, &memory, &lexicon).unwrap();
let mut zip = zip::ZipArchive::new(Cursor::new(bytes)).unwrap();
let names: Vec<String> = (0..zip.len()).map(|i| zip.by_index(i).unwrap().name().to_string()).collect();
assert!(names.contains(&"manifest.hjson".to_string()));
assert!(names.contains(&"memory.tsv".to_string()));
assert!(names.contains(&"lexicon.tsv".to_string()));
assert!(names.contains(&"README.md".to_string()));
let mut mem = String::new();
zip.by_name("memory.tsv").unwrap().read_to_string(&mut mem).unwrap();
assert!(mem.contains("the bird sees the stone\tkira nami pata"));
}
}