Skip to main content

zig_core/workflow/
parser.rs

1use std::io::Read;
2use std::path::{Path, PathBuf};
3
4use crate::error::ZigError;
5use crate::workflow::model::Workflow;
6
7/// Parse a workflow from a TOML string.
8pub fn parse(content: &str) -> Result<Workflow, ZigError> {
9    let workflow: Workflow = toml::from_str(content).map_err(|e| ZigError::Parse(e.to_string()))?;
10    Ok(workflow)
11}
12
13/// Parse a plain `.zwf` workflow file from disk.
14///
15/// This does not handle `.zwfz` zip archives — use [`parse_workflow`] for
16/// that. If the file is a zip archive, it is extracted to a temp directory
17/// and the TOML workflow inside is parsed. The returned `WorkflowSource`
18/// must be kept alive for the duration of execution — dropping it cleans
19/// up any temp directory.
20pub fn parse_file(path: &Path) -> Result<Workflow, ZigError> {
21    let content = std::fs::read_to_string(path)
22        .map_err(|e| ZigError::Io(format!("failed to read {}: {e}", path.display())))?;
23    parse(&content)
24}
25
26/// Parse a workflow file, handling both plain `.zwf` and zipped `.zwfz`.
27///
28/// Returns the parsed `Workflow` and a `WorkflowSource` that tracks the
29/// effective directory for resolving relative file paths. The source must
30/// be kept alive during execution.
31pub fn parse_workflow(path: &Path) -> Result<(Workflow, WorkflowSource), ZigError> {
32    if is_zip_archive(path)? {
33        parse_zip(path)
34    } else {
35        let content = std::fs::read_to_string(path)
36            .map_err(|e| ZigError::Io(format!("failed to read {}: {e}", path.display())))?;
37        let wf = parse(&content)?;
38        let dir = path
39            .parent()
40            .unwrap_or_else(|| Path::new("."))
41            .to_path_buf();
42        Ok((wf, WorkflowSource::Directory(dir)))
43    }
44}
45
46/// Tracks where a workflow's associated files live.
47///
48/// For plain `.zwf` files, this is the parent directory. For `.zwfz` zip
49/// archives, this is a temp directory containing the extracted contents.
50/// Dropping the `Zip` variant cleans up the temp directory.
51#[derive(Debug)]
52pub enum WorkflowSource {
53    /// Plain TOML file on disk — resolve paths relative to this directory.
54    Directory(PathBuf),
55    /// Extracted zip archive — temp dir is cleaned up on drop.
56    Zip {
57        _temp_dir: tempfile::TempDir,
58        extract_dir: PathBuf,
59    },
60}
61
62impl WorkflowSource {
63    /// Get the effective directory for resolving relative file paths.
64    pub fn dir(&self) -> &Path {
65        match self {
66            WorkflowSource::Directory(dir) => dir,
67            WorkflowSource::Zip { extract_dir, .. } => extract_dir,
68        }
69    }
70}
71
72/// Check if a file is a zip archive by reading its magic bytes.
73fn is_zip_archive(path: &Path) -> Result<bool, ZigError> {
74    let mut file = std::fs::File::open(path)
75        .map_err(|e| ZigError::Io(format!("failed to open {}: {e}", path.display())))?;
76    let mut magic = [0u8; 4];
77    match file.read_exact(&mut magic) {
78        Ok(()) => Ok(&magic == b"PK\x03\x04"),
79        Err(_) => Ok(false), // File too short to be a zip
80    }
81}
82
83/// Hard caps for zip extraction. A malicious archive can compress trivially
84/// to many gigabytes ("zip bomb") — these limits bound the damage an
85/// untrusted `.zwfz` can cause the host.
86const MAX_ZIP_ENTRIES: usize = 10_000;
87const MAX_ZIP_TOTAL_BYTES: u64 = 100 * 1024 * 1024; // 100 MiB
88
89/// Extract a zip archive into a destination directory.
90///
91/// Used by both [`parse_zip`] (into a temp directory) and
92/// `update::run_update` (into a staging directory for in-place editing).
93/// Returns an error if any entry has an invalid path, is a symlink, or
94/// if the cumulative extracted size exceeds [`MAX_ZIP_TOTAL_BYTES`].
95pub fn extract_zip(archive_path: &Path, dest: &Path) -> Result<(), ZigError> {
96    let file = std::fs::File::open(archive_path)
97        .map_err(|e| ZigError::Io(format!("failed to open {}: {e}", archive_path.display())))?;
98    let mut archive = zip::ZipArchive::new(file)
99        .map_err(|e| ZigError::Parse(format!("failed to read zip archive: {e}")))?;
100
101    if archive.len() > MAX_ZIP_ENTRIES {
102        return Err(ZigError::Parse(format!(
103            "zip archive has {} entries (max {})",
104            archive.len(),
105            MAX_ZIP_ENTRIES
106        )));
107    }
108
109    let mut total_written: u64 = 0;
110
111    for i in 0..archive.len() {
112        let mut entry = archive
113            .by_index(i)
114            .map_err(|e| ZigError::Parse(format!("failed to read zip entry: {e}")))?;
115
116        // Reject symlink entries up-front: they can escape `dest` on
117        // extraction and shift the whole tree out from under us.
118        if let Some(mode) = entry.unix_mode() {
119            if mode & 0o170000 == 0o120000 {
120                return Err(ZigError::Parse(
121                    "zip archive contains a symlink entry (rejected)".into(),
122                ));
123            }
124        }
125
126        let out_path = dest.join(
127            entry
128                .enclosed_name()
129                .ok_or_else(|| ZigError::Parse("zip entry has invalid path".into()))?,
130        );
131
132        if entry.is_dir() {
133            std::fs::create_dir_all(&out_path).map_err(|e| {
134                ZigError::Io(format!(
135                    "failed to create directory {}: {e}",
136                    out_path.display()
137                ))
138            })?;
139        } else {
140            if let Some(parent) = out_path.parent() {
141                std::fs::create_dir_all(parent).map_err(|e| {
142                    ZigError::Io(format!(
143                        "failed to create directory {}: {e}",
144                        parent.display()
145                    ))
146                })?;
147            }
148            let mut outfile = std::fs::File::create(&out_path).map_err(|e| {
149                ZigError::Io(format!("failed to create file {}: {e}", out_path.display()))
150            })?;
151
152            // Enforce the cumulative-size cap by using a `take` reader —
153            // stop reading once we hit the remaining budget so decompressed
154            // bombs can't blow past the limit.
155            let remaining = MAX_ZIP_TOTAL_BYTES.saturating_sub(total_written);
156            let mut limited = std::io::Read::take(&mut entry, remaining + 1);
157            let written = std::io::copy(&mut limited, &mut outfile).map_err(|e| {
158                ZigError::Io(format!("failed to extract {}: {e}", out_path.display()))
159            })?;
160            total_written = total_written.saturating_add(written);
161            if total_written > MAX_ZIP_TOTAL_BYTES {
162                return Err(ZigError::Parse(format!(
163                    "zip archive expands to more than {} bytes (zip bomb protection)",
164                    MAX_ZIP_TOTAL_BYTES
165                )));
166            }
167        }
168    }
169
170    Ok(())
171}
172
173/// Parse a `.zwfz` zip archive.
174///
175/// Extracts the archive to a temp directory, finds the single TOML workflow
176/// file inside, and parses it.
177fn parse_zip(path: &Path) -> Result<(Workflow, WorkflowSource), ZigError> {
178    let temp_dir = tempfile::TempDir::new()
179        .map_err(|e| ZigError::Io(format!("failed to create temp directory: {e}")))?;
180
181    extract_zip(path, temp_dir.path())?;
182
183    // Find the single TOML workflow file
184    let toml_files: Vec<PathBuf> = find_workflow_files(temp_dir.path())?;
185
186    if toml_files.is_empty() {
187        return Err(ZigError::Parse(
188            "zip archive contains no .toml or .zwf workflow file".into(),
189        ));
190    }
191    if toml_files.len() > 1 {
192        return Err(ZigError::Parse(format!(
193            "zip archive contains {} workflow files (expected exactly one): {}",
194            toml_files.len(),
195            toml_files
196                .iter()
197                .map(|p| p.display().to_string())
198                .collect::<Vec<_>>()
199                .join(", ")
200        )));
201    }
202
203    let toml_path = &toml_files[0];
204    let content = std::fs::read_to_string(toml_path)
205        .map_err(|e| ZigError::Io(format!("failed to read {}: {e}", toml_path.display())))?;
206    let wf = parse(&content)?;
207
208    // The effective dir is the parent of the toml file within the temp dir
209    let extract_dir = toml_path.parent().unwrap_or(temp_dir.path()).to_path_buf();
210
211    Ok((
212        wf,
213        WorkflowSource::Zip {
214            _temp_dir: temp_dir,
215            extract_dir,
216        },
217    ))
218}
219
220/// Recursively find `.toml` and `.zwf` workflow files in a directory
221/// (only the top level and immediate subdirectories).
222pub fn find_workflow_files(dir: &Path) -> Result<Vec<PathBuf>, ZigError> {
223    let mut results = Vec::new();
224
225    fn scan_dir(dir: &Path, results: &mut Vec<PathBuf>, depth: usize) -> Result<(), ZigError> {
226        let entries = std::fs::read_dir(dir).map_err(|e| {
227            ZigError::Io(format!("failed to read directory {}: {e}", dir.display()))
228        })?;
229
230        for entry in entries {
231            let entry =
232                entry.map_err(|e| ZigError::Io(format!("failed to read directory entry: {e}")))?;
233            let path = entry.path();
234
235            if path.is_file() {
236                if let Some(ext) = path.extension() {
237                    if ext == "toml" || ext == "zwf" {
238                        // Quick check: does it look like a workflow TOML?
239                        if let Ok(content) = std::fs::read_to_string(&path) {
240                            if content.contains("[workflow]") {
241                                results.push(path);
242                            }
243                        }
244                    }
245                }
246            } else if path.is_dir() && depth < 1 {
247                scan_dir(&path, results, depth + 1)?;
248            }
249        }
250        Ok(())
251    }
252
253    scan_dir(dir, &mut results, 0)?;
254    Ok(results)
255}
256
257/// Serialize a workflow back to TOML (for the `create` command).
258pub fn to_toml(workflow: &Workflow) -> Result<String, ZigError> {
259    toml::to_string_pretty(workflow).map_err(|e| ZigError::Serialize(e.to_string()))
260}
261
262#[cfg(test)]
263#[path = "parser_tests.rs"]
264mod tests;