use std::collections::BTreeMap;
use std::collections::BTreeSet;
use std::path::Path;
use zenith_core::{AssetKind, BytesAssetProvider, KdlAdapter, KdlSource, Severity};
use zenith_render::render_png;
use zenith_scene::compile_page;
use zenith_tx::{Op, OpSpan, Transaction, TxStatus, run_transaction};
use crate::json_types::{DiagnosticJson, MergeOutput, MergeRowResult};
use crate::commands::render::{
build_asset_provider, build_font_provider, collect_missing_asset_diagnostics,
resolve_text_sources,
};
#[derive(Debug)]
pub struct MergeError {
pub message: String,
pub exit_code: u8,
}
impl MergeError {
fn new(msg: impl Into<String>) -> Self {
Self {
message: msg.into(),
exit_code: 2,
}
}
}
#[derive(Debug)]
pub struct RowResult {
pub row: usize,
pub key: Option<String>,
pub outputs: Vec<String>,
pub failure: Option<String>,
}
#[derive(Debug)]
pub struct MergeReport {
pub rows: Vec<RowResult>,
}
impl MergeReport {
pub fn written(&self) -> Vec<String> {
self.rows
.iter()
.flat_map(|r| r.outputs.iter().cloned())
.collect()
}
pub fn failed(&self) -> Vec<&RowResult> {
self.rows.iter().filter(|r| r.failure.is_some()).collect()
}
}
struct DataBinding {
node_id: String,
column: String,
}
struct AssetBinding {
node_id: String,
column: String,
}
fn reject_data_role_on_non_text(role: Option<&str>, id: &str) -> Result<(), MergeError> {
if let Some(role) = role
&& role.starts_with("data.")
{
return Err(MergeError::new(format!(
"role=\"{}\" on non-text node {}: replace_text supports text nodes only",
role, id
)));
}
Ok(())
}
fn collect_data_nodes(
nodes: &[zenith_core::Node],
out: &mut Vec<DataBinding>,
asset_out: &mut Vec<AssetBinding>,
) -> Result<(), MergeError> {
for node in nodes {
match node {
zenith_core::Node::Text(n) => {
if let Some(role) = n.role.as_deref()
&& let Some(col) = role.strip_prefix("data.")
{
out.push(DataBinding {
node_id: n.id.clone(),
column: col.to_owned(),
});
}
}
zenith_core::Node::Image(n) => {
if let Some(role) = n.role.as_deref()
&& let Some(col) = role.strip_prefix("data.")
{
asset_out.push(AssetBinding {
node_id: n.id.clone(),
column: col.to_owned(),
});
}
}
zenith_core::Node::Rect(n) => {
reject_data_role_on_non_text(n.role.as_deref(), &n.id)?;
}
zenith_core::Node::Ellipse(n) => {
reject_data_role_on_non_text(n.role.as_deref(), &n.id)?;
}
zenith_core::Node::Line(n) => {
reject_data_role_on_non_text(n.role.as_deref(), &n.id)?;
}
zenith_core::Node::Code(n) => {
reject_data_role_on_non_text(n.role.as_deref(), &n.id)?;
}
zenith_core::Node::Frame(n) => {
reject_data_role_on_non_text(n.role.as_deref(), &n.id)?;
collect_data_nodes(&n.children, out, asset_out)?;
}
zenith_core::Node::Group(n) => {
reject_data_role_on_non_text(n.role.as_deref(), &n.id)?;
collect_data_nodes(&n.children, out, asset_out)?;
}
zenith_core::Node::Polygon(n) => {
reject_data_role_on_non_text(n.role.as_deref(), &n.id)?;
}
zenith_core::Node::Polyline(n) => {
reject_data_role_on_non_text(n.role.as_deref(), &n.id)?;
}
zenith_core::Node::Instance(n) => {
reject_data_role_on_non_text(n.role.as_deref(), &n.id)?;
}
zenith_core::Node::Field(n) => {
reject_data_role_on_non_text(n.role.as_deref(), &n.id)?;
}
zenith_core::Node::Toc(n) => {
reject_data_role_on_non_text(n.role.as_deref(), &n.id)?;
}
zenith_core::Node::Footnote(n) => {
reject_data_role_on_non_text(n.role.as_deref(), &n.id)?;
}
zenith_core::Node::Table(n) => {
reject_data_role_on_non_text(n.role.as_deref(), &n.id)?;
for row in &n.rows {
for cell in &row.cells {
collect_data_nodes(&cell.children, out, asset_out)?;
}
}
}
zenith_core::Node::Shape(n) => {
reject_data_role_on_non_text(n.role.as_deref(), &n.id)?;
}
zenith_core::Node::Connector(n) => {
reject_data_role_on_non_text(n.role.as_deref(), &n.id)?;
}
zenith_core::Node::Pattern(n) => {
reject_data_role_on_non_text(n.role.as_deref(), &n.id)?;
}
zenith_core::Node::Chart(n) => {
reject_data_role_on_non_text(n.role.as_deref(), &n.id)?;
}
zenith_core::Node::Light(n) => {
reject_data_role_on_non_text(n.role.as_deref(), &n.id)?;
}
zenith_core::Node::Mesh(n) => {
reject_data_role_on_non_text(n.role.as_deref(), &n.id)?;
}
zenith_core::Node::Unknown(_n) => {
}
}
}
Ok(())
}
pub fn sanitize_filename(s: &str) -> String {
let mapped: String = s
.chars()
.map(|c| match c {
'/' | '\\' | ':' | '*' | '?' | '"' | '<' | '>' | '|' | '\0' => '_',
other => other,
})
.collect();
let trimmed = mapped.trim_matches(|c: char| c == '.' || c.is_whitespace());
if trimmed.is_empty() {
"_".to_owned()
} else {
trimmed.to_owned()
}
}
pub fn run(
doc_src: &str,
csv_src: &str,
project_dir: Option<&Path>,
out_dir: &Path,
name_by: Option<&str>,
) -> Result<MergeReport, MergeError> {
let doc = KdlAdapter
.parse(doc_src.as_bytes())
.map_err(|e| MergeError::new(format!("error[parse.error]: {}", e.message)))?;
let mut bindings: Vec<DataBinding> = Vec::new();
let mut asset_bindings: Vec<AssetBinding> = Vec::new();
for page in &doc.body.pages {
collect_data_nodes(&page.children, &mut bindings, &mut asset_bindings)?;
}
if bindings.is_empty() && asset_bindings.is_empty() {
return Err(MergeError::new("no role=\"data.*\" template nodes found"));
}
if !asset_bindings.is_empty() && project_dir.is_none() {
return Err(MergeError::new(
"image data bindings require a project directory (the .zen file must be on disk)",
));
}
let mut reader = csv::Reader::from_reader(csv_src.as_bytes());
let headers = reader
.headers()
.map_err(|e| MergeError::new(format!("CSV header error: {}", e)))?
.clone();
let header_index: BTreeMap<String, usize> = headers
.iter()
.enumerate()
.map(|(i, h)| (h.to_owned(), i))
.collect();
let unknown: Vec<String> = bindings
.iter()
.filter(|b| !header_index.contains_key(&b.column))
.map(|b| b.column.clone())
.collect();
if !unknown.is_empty() {
return Err(MergeError::new(format!(
"CSV column(s) not found in header: {}",
unknown.join(", ")
)));
}
let unknown_asset: Vec<String> = asset_bindings
.iter()
.filter(|b| !header_index.contains_key(&b.column))
.map(|b| b.column.clone())
.collect();
if !unknown_asset.is_empty() {
return Err(MergeError::new(format!(
"CSV column(s) not found in header: {}",
unknown_asset.join(", ")
)));
}
if let Some(col) = name_by
&& !header_index.contains_key(col)
{
return Err(MergeError::new(format!(
"--name-by column {:?} not found in CSV header",
col
)));
}
let binding_indices: Vec<usize> = bindings
.iter()
.map(|b| -> Result<usize, MergeError> {
header_index
.get(&b.column)
.copied()
.ok_or_else(|| MergeError::new(format!("column {:?} not found", b.column)))
})
.collect::<Result<Vec<usize>, MergeError>>()?;
let asset_binding_indices: Vec<usize> = asset_bindings
.iter()
.map(|b| -> Result<usize, MergeError> {
header_index
.get(&b.column)
.copied()
.ok_or_else(|| MergeError::new(format!("column {:?} not found", b.column)))
})
.collect::<Result<Vec<usize>, MergeError>>()?;
let name_by_index: Option<usize> = match name_by {
None => None,
Some(col) => Some(
header_index
.get(col)
.copied()
.ok_or_else(|| MergeError::new(format!("--name-by column {:?} not found", col)))?,
),
};
let fonts =
build_font_provider(&doc, project_dir, false).map_err(|e| MergeError::new(e.message))?;
let template_assets = match project_dir {
Some(dir) => {
build_asset_provider(&doc, dir, false).map_err(|e| MergeError::new(e.message))?
}
None => BytesAssetProvider::new(),
};
std::fs::create_dir_all(out_dir).map_err(|e| {
MergeError::new(format!(
"could not create output directory '{}': {}",
out_dir.display(),
e
))
})?;
let mut rows: Vec<RowResult> = Vec::new();
let mut used_names: BTreeSet<String> = BTreeSet::new();
for (row_idx, record_result) in reader.records().enumerate() {
let record = match record_result {
Ok(r) => r,
Err(e) => {
push_failure(&mut rows, row_idx, None, format!("CSV read error: {}", e));
continue;
}
};
let row_key: Option<String> =
name_by_index.map(|col_idx| record.get(col_idx).unwrap_or("").to_owned());
let mut ops: Vec<Op> = bindings
.iter()
.zip(binding_indices.iter())
.map(|(binding, &col_idx)| {
let cell = record.get(col_idx).unwrap_or("");
Op::ReplaceText {
node: binding.node_id.clone(),
spans: vec![OpSpan {
text: cell.to_owned(),
fill: None,
font_weight: None,
italic: None,
underline: None,
strikethrough: None,
vertical_align: None,
footnote_ref: None,
}],
}
})
.collect();
for (binding, &col_idx) in asset_bindings.iter().zip(asset_binding_indices.iter()) {
let cell = record.get(col_idx).unwrap_or("").trim();
if cell.is_empty() {
continue;
}
let asset_id = row_asset_id(row_idx, &binding.column);
ops.push(Op::AddAsset {
id: asset_id.clone(),
kind: "image".to_owned(),
src: cell.to_owned(),
sha256: None,
});
ops.push(Op::SetAsset {
node_id: binding.node_id.clone(),
asset_id,
});
}
let tx = Transaction {
ops,
permissions: Default::default(),
};
let tx_result = match run_transaction(&doc, &tx) {
Ok(r) => r,
Err(e) => {
push_failure(
&mut rows,
row_idx,
row_key,
format!("transaction engine error: {}", e.message),
);
continue;
}
};
if tx_result.status == TxStatus::Rejected {
let msgs: Vec<String> = tx_result
.diagnostics
.iter()
.map(|d| {
format!(
"{}[{}]: {}",
crate::json_types::severity_str(&d.severity),
d.code,
d.message
)
})
.collect();
push_failure(
&mut rows,
row_idx,
row_key,
format!("transaction rejected: {}", msgs.join("; ")),
);
continue;
}
let mut row_doc = match KdlAdapter.parse(tx_result.source_after.as_bytes()) {
Ok(d) => d,
Err(e) => {
push_failure(
&mut rows,
row_idx,
row_key,
format!("post-transaction parse error: {}", e.message),
);
continue;
}
};
{
let mut text_src_diags: Vec<zenith_core::Diagnostic> = Vec::new();
resolve_text_sources(&mut row_doc, project_dir, &mut text_src_diags);
let hard: Vec<String> = text_src_diags
.iter()
.filter(|d| d.severity == Severity::Error)
.map(crate::commands::format_error_diag)
.collect();
if !hard.is_empty() {
push_failure(
&mut rows,
row_idx,
row_key,
format!("text source error(s): {}", hard.join("; ")),
);
continue;
}
}
let row_assets = if asset_bindings.is_empty() {
None
} else {
let Some(dir) = project_dir else {
push_failure(
&mut rows,
row_idx,
row_key,
"internal: project directory unexpectedly missing".to_owned(),
);
continue;
};
let mut row_provider =
build_asset_provider(&doc, dir, false).map_err(|e| MergeError::new(e.message))?;
let mut row_asset_missing = false;
for (binding, &col_idx) in asset_bindings.iter().zip(asset_binding_indices.iter()) {
let cell = record.get(col_idx).unwrap_or("").trim();
if cell.is_empty() {
continue;
}
let asset_id = row_asset_id(row_idx, &binding.column);
let img_path = dir.join(cell);
match std::fs::read(&img_path) {
Ok(bytes) => {
row_provider.register(&asset_id, AssetKind::Image, bytes.into());
}
Err(e) => {
push_failure(
&mut rows,
row_idx,
row_key.clone(),
format!(
"error[asset.missing]: asset '{}' file not found: '{}': {}",
asset_id,
img_path.display(),
e
),
);
row_asset_missing = true;
break;
}
}
}
if row_asset_missing {
continue;
}
Some(row_provider)
};
if let Some(dir) = project_dir {
let missing_diags = collect_missing_asset_diagnostics(&row_doc, dir);
let hard: Vec<String> = missing_diags
.iter()
.filter(|d| d.severity == Severity::Error)
.map(crate::commands::format_error_diag)
.collect();
if !hard.is_empty() {
push_failure(
&mut rows,
row_idx,
row_key,
format!("asset error(s): {}", hard.join("; ")),
);
continue;
}
}
let page_count = row_doc.body.pages.len();
if page_count == 0 {
push_failure(
&mut rows,
row_idx,
row_key,
"row document has no pages".to_owned(),
);
continue;
}
let row_stem = match name_by_index {
Some(col_idx) => sanitize_filename(record.get(col_idx).unwrap_or("")),
None => format!("row-{:04}", row_idx + 1),
};
let page_filenames: Vec<String> = (0..page_count)
.map(|pi| page_filename(&row_stem, pi, page_count))
.collect();
let mut collided = false;
for fname in &page_filenames {
if used_names.contains(fname) {
push_failure(
&mut rows,
row_idx,
row_key.clone(),
format!("output filename collision: {fname}"),
);
collided = true;
break;
}
}
if collided {
continue;
}
let mut page_failures: Vec<String> = Vec::new();
let mut page_pngs: Vec<(String, Vec<u8>)> = Vec::new();
for (page_index, page_fname) in page_filenames.iter().enumerate() {
let compile_result = compile_page(&row_doc, &fonts, page_index, None);
let hard_diags: Vec<String> = compile_result
.diagnostics
.iter()
.filter(|d| d.severity == Severity::Error)
.map(crate::commands::format_error_diag)
.collect();
if !hard_diags.is_empty() {
page_failures.push(format!(
"page {}: compile error(s): {}",
page_index + 1,
hard_diags.join("; ")
));
continue;
}
let png_result = match &row_assets {
Some(ra) => render_png(&compile_result.scene, &fonts, ra),
None => render_png(&compile_result.scene, &fonts, &template_assets),
};
match png_result {
Ok(bytes) => {
page_pngs.push((page_fname.clone(), bytes));
}
Err(e) => {
page_failures.push(format!("page {}: render error: {}", page_index + 1, e));
}
}
}
if !page_failures.is_empty() {
push_failure(&mut rows, row_idx, row_key, page_failures.join("; "));
continue;
}
let mut write_failed = false;
let mut newly_written: Vec<String> = Vec::new();
for (fname, bytes) in page_pngs {
let out_path = out_dir.join(&fname);
if let Err(e) = std::fs::write(&out_path, &bytes) {
push_failure(
&mut rows,
row_idx,
row_key.clone(),
format!("write error '{}': {}", out_path.display(), e),
);
write_failed = true;
break;
}
newly_written.push(fname);
}
if write_failed {
continue;
}
for fname in &newly_written {
used_names.insert(fname.clone());
}
rows.push(RowResult {
row: row_idx,
key: row_key,
outputs: newly_written,
failure: None,
});
}
Ok(MergeReport { rows })
}
fn push_failure(rows: &mut Vec<RowResult>, row: usize, key: Option<String>, reason: String) {
rows.push(RowResult {
row,
key,
outputs: Vec::new(),
failure: Some(reason),
});
}
fn row_asset_id(row_idx: usize, column: &str) -> String {
format!("merge.row.{}.asset.{}", row_idx, column)
}
fn page_filename(stem: &str, page_index: usize, page_count: usize) -> String {
if page_count == 1 {
format!("{stem}.png")
} else {
format!("{stem}-page-{}.png", page_index + 1)
}
}
pub fn build_manifest(
doc_src: &str,
csv_src: &str,
name_by: Option<&str>,
report: &MergeReport,
) -> crate::json_types::MergeManifest {
use sha2::{Digest, Sha256};
const MANIFEST_FORMAT_VERSION: &str = "1";
let source_sha256 = format!("{:x}", Sha256::digest(doc_src.as_bytes()));
let data_sha256 = format!("{:x}", Sha256::digest(csv_src.as_bytes()));
let rows = report
.rows
.iter()
.filter(|r| r.failure.is_none())
.map(|r| crate::json_types::ManifestRow {
row: r.row,
key: r.key.clone(),
outputs: r.outputs.clone(),
})
.collect();
crate::json_types::MergeManifest {
schema: "zenith-merge-manifest-v1",
generator: MANIFEST_FORMAT_VERSION,
source_sha256,
data_sha256,
name_by: name_by.map(str::to_owned),
rows,
}
}
pub fn to_json_output(report: &MergeReport) -> MergeOutput {
let n_written = report.rows.iter().filter(|r| r.failure.is_none()).count();
let n_failed = report.rows.iter().filter(|r| r.failure.is_some()).count();
MergeOutput {
schema: "zenith-merge-v1",
total_rows: report.rows.len(),
written: n_written,
failed: n_failed,
rows: report
.rows
.iter()
.map(|r| MergeRowResult {
row: r.row,
key: r.key.clone(),
status: if r.failure.is_none() { "ok" } else { "failed" },
outputs: r.outputs.clone(),
diagnostics: match &r.failure {
None => Vec::new(),
Some(reason) => vec![DiagnosticJson {
code: "merge.row.failed".to_owned(),
severity: "error".to_owned(),
message: reason.clone(),
subject_id: None,
}],
},
})
.collect(),
}
}