use anyhow::{Context, Result};
use once_cell::sync::Lazy;
use regex::Regex;
use std::ffi::CStr;
use std::os::raw::c_char;
use std::path::Path;
use thiserror::Error;
use super::types::{
CargoToml, LibraryInfo, PackageInfo, PackageManifest, TaskInfo, CLOACINA_VERSION,
EXECUTE_TASK_SYMBOL,
};
const MAX_TASKS: usize = 10_000;
static PACKAGED_WORKFLOW_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(r#"#\[packaged_workflow\s*\(\s*[^)]*package\s*=\s*"([^"]+)"[^)]*\)\s*\]"#)
.expect("Invalid packaged_workflow regex pattern - this is a compile-time bug")
});
#[derive(Debug, Error)]
pub enum ManifestError {
#[error("Null pointer encountered for field: {field}")]
NullPointer { field: &'static str },
#[error("Misaligned pointer for field: {field}")]
MisalignedPointer { field: &'static str },
#[error("Null string pointer for field: {field}")]
NullString { field: String },
#[error("Invalid UTF-8 in field '{field}': {source}")]
InvalidUtf8 {
field: String,
#[source]
source: std::str::Utf8Error,
},
#[error("Invalid dependencies JSON for task '{task_id}': {source}")]
InvalidDependencies {
task_id: String,
#[source]
source: serde_json::Error,
},
#[error("Null task slice with non-zero count ({count})")]
NullTaskSlice { count: usize },
#[error("Task count {count} exceeds maximum allowed ({max})")]
TooManyTasks { count: usize, max: usize },
#[error("Invalid graph data JSON: {source}")]
InvalidGraphData {
#[source]
source: serde_json::Error,
},
#[error("Library error: {message}")]
LibraryError { message: String },
}
pub(crate) fn safe_cstr_to_string(
ptr: *const c_char,
field_name: &str,
) -> Result<String, ManifestError> {
if ptr.is_null() {
return Err(ManifestError::NullString {
field: field_name.to_string(),
});
}
unsafe { CStr::from_ptr(ptr) }
.to_str()
.map(|s| s.to_string())
.map_err(|e| ManifestError::InvalidUtf8 {
field: field_name.to_string(),
source: e,
})
}
pub(crate) fn safe_cstr_to_option_string(
ptr: *const c_char,
field_name: &str,
) -> Result<Option<String>, ManifestError> {
if ptr.is_null() {
return Ok(None);
}
unsafe { CStr::from_ptr(ptr) }
.to_str()
.map(|s| Some(s.to_string()))
.map_err(|e| ManifestError::InvalidUtf8 {
field: field_name.to_string(),
source: e,
})
}
pub(crate) unsafe fn validate_ptr<'a, T>(
ptr: *const T,
field_name: &'static str,
) -> Result<&'a T, ManifestError> {
if ptr.is_null() {
return Err(ManifestError::NullPointer { field: field_name });
}
if (ptr as usize) % std::mem::align_of::<T>() != 0 {
return Err(ManifestError::MisalignedPointer { field: field_name });
}
Ok(&*ptr)
}
pub(crate) unsafe fn validate_slice<'a, T>(
ptr: *const T,
count: usize,
field_name: &'static str,
) -> Result<&'a [T], ManifestError> {
if count > MAX_TASKS {
return Err(ManifestError::TooManyTasks {
count,
max: MAX_TASKS,
});
}
if ptr.is_null() && count > 0 {
return Err(ManifestError::NullTaskSlice { count });
}
if ptr.is_null() {
return Ok(&[]);
}
if (ptr as usize) % std::mem::align_of::<T>() != 0 {
return Err(ManifestError::MisalignedPointer { field: field_name });
}
Ok(std::slice::from_raw_parts(ptr, count))
}
pub fn generate_manifest(
cargo_toml: &CargoToml,
so_path: &Path,
target: &Option<String>,
project_path: &Path,
) -> Result<PackageManifest> {
let package = cargo_toml
.package
.as_ref()
.ok_or_else(|| anyhow::anyhow!("Missing package section in Cargo.toml"))?;
let architecture = if let Some(target_triple) = target {
target_triple.clone()
} else {
get_current_architecture()
};
let library_filename = so_path
.file_name()
.ok_or_else(|| anyhow::anyhow!("Invalid so_path"))?
.to_string_lossy()
.to_string();
let (tasks, graph_data, package_metadata) =
extract_task_info_and_graph_from_library(so_path, project_path)?;
let manifest = PackageManifest {
package: PackageInfo {
name: package.name.clone(),
version: package.version.clone(),
description: package_metadata
.description
.unwrap_or_else(|| format!("Packaged workflow: {}", package.name)),
author: package_metadata.author,
workflow_fingerprint: package_metadata.workflow_fingerprint,
cloacina_version: CLOACINA_VERSION.to_string(),
},
library: LibraryInfo {
filename: library_filename,
symbols: vec![EXECUTE_TASK_SYMBOL.to_string()],
architecture,
},
tasks,
graph: graph_data,
};
Ok(manifest)
}
#[derive(Debug, Clone)]
pub(crate) struct PackageMetadata {
pub description: Option<String>,
pub author: Option<String>,
pub workflow_fingerprint: Option<String>,
}
fn extract_task_info_and_graph_from_library(
so_path: &Path,
project_path: &Path,
) -> Result<(
Vec<TaskInfo>,
Option<crate::WorkflowGraphData>,
PackageMetadata,
)> {
#[repr(C)]
#[derive(Debug, Clone, Copy)]
struct CTaskMetadata {
index: u32,
local_id: *const std::os::raw::c_char,
namespaced_id_template: *const std::os::raw::c_char,
dependencies_json: *const std::os::raw::c_char,
description: *const std::os::raw::c_char,
source_location: *const std::os::raw::c_char,
}
#[repr(C)]
#[derive(Debug, Clone, Copy)]
struct CPackageTasks {
task_count: u32,
tasks: *const CTaskMetadata,
package_name: *const std::os::raw::c_char,
package_description: *const std::os::raw::c_char,
package_author: *const std::os::raw::c_char,
workflow_fingerprint: *const std::os::raw::c_char,
graph_data_json: *const std::os::raw::c_char,
}
let lib = unsafe {
libloading::Library::new(so_path).with_context(|| {
format!(
"Failed to load library for metadata extraction: {:?}",
so_path
)
})?
};
let get_metadata = unsafe {
match lib
.get::<unsafe extern "C" fn() -> *const CPackageTasks>(b"cloacina_get_task_metadata")
{
Ok(func) => func,
Err(_) => {
let cargo_toml_path = project_path.join("Cargo.toml");
let _cargo_content = std::fs::read_to_string(&cargo_toml_path)
.context("Failed to read Cargo.toml for package name extraction")?;
let package_names = extract_package_names_from_source(project_path)?;
let mut found_func = None;
for package_name in package_names {
let normalized_name = package_name
.replace("-", "_")
.replace(" ", "_")
.to_lowercase();
let func_name = format!("cloacina_get_task_metadata_{}\0", normalized_name);
if let Ok(func) = lib
.get::<unsafe extern "C" fn() -> *const CPackageTasks>(func_name.as_bytes())
{
found_func = Some(func);
break;
}
}
found_func
.ok_or_else(|| anyhow::anyhow!("No task metadata function found in library"))?
}
}
};
let package_tasks_ptr = unsafe { get_metadata() };
if package_tasks_ptr.is_null() {
return Ok((
vec![],
None,
PackageMetadata {
description: None,
author: None,
workflow_fingerprint: None,
},
));
}
let package_tasks = unsafe { validate_ptr(package_tasks_ptr, "package_tasks") }
.map_err(|e| anyhow::anyhow!("{}", e))?;
let graph_data = if !package_tasks.graph_data_json.is_null() {
let graph_json_str = safe_cstr_to_string(package_tasks.graph_data_json, "graph_data_json")
.map_err(|e| anyhow::anyhow!("{}", e))?;
let graph = serde_json::from_str::<crate::WorkflowGraphData>(&graph_json_str)
.map_err(|e| ManifestError::InvalidGraphData { source: e })
.map_err(|e| anyhow::anyhow!("{}", e))?;
Some(graph)
} else {
None
};
let mut tasks = Vec::new();
let task_count = package_tasks.task_count as usize;
let tasks_slice = unsafe { validate_slice(package_tasks.tasks, task_count, "tasks") }
.map_err(|e| anyhow::anyhow!("{}", e))?;
for (index, task_metadata) in tasks_slice.iter().enumerate() {
let local_id = safe_cstr_to_string(task_metadata.local_id, "local_id")
.map_err(|e| anyhow::anyhow!("Task {}: {}", index, e))?;
let description = safe_cstr_to_string(task_metadata.description, "description")
.map_err(|e| anyhow::anyhow!("Task {}: {}", index, e))?;
let source_location = safe_cstr_to_string(task_metadata.source_location, "source_location")
.map_err(|e| anyhow::anyhow!("Task {}: {}", index, e))?;
let dependencies_json =
safe_cstr_to_string(task_metadata.dependencies_json, "dependencies_json")
.map_err(|e| anyhow::anyhow!("Task {}: {}", index, e))?;
let dependencies: Vec<String> = serde_json::from_str(&dependencies_json)
.map_err(|e| ManifestError::InvalidDependencies {
task_id: local_id.clone(),
source: e,
})
.map_err(|e| anyhow::anyhow!("{}", e))?;
tasks.push(TaskInfo {
index: index as u32,
id: local_id,
dependencies,
description,
source_location,
});
}
let package_description =
safe_cstr_to_option_string(package_tasks.package_description, "package_description")
.map_err(|e| anyhow::anyhow!("{}", e))?;
let package_author = safe_cstr_to_option_string(package_tasks.package_author, "package_author")
.map_err(|e| anyhow::anyhow!("{}", e))?;
let workflow_fingerprint =
safe_cstr_to_option_string(package_tasks.workflow_fingerprint, "workflow_fingerprint")
.map_err(|e| anyhow::anyhow!("{}", e))?;
let package_metadata = PackageMetadata {
description: package_description,
author: package_author,
workflow_fingerprint,
};
Ok((tasks, graph_data, package_metadata))
}
pub(crate) fn extract_package_names_from_source(project_path: &Path) -> Result<Vec<String>> {
let src_path = project_path.join("src");
let mut package_names = Vec::new();
for entry in std::fs::read_dir(&src_path)
.with_context(|| format!("Failed to read src directory: {:?}", src_path))?
{
let entry = entry?;
let path = entry.path();
if path.extension().and_then(|s| s.to_str()) == Some("rs") {
let content = std::fs::read_to_string(&path)
.with_context(|| format!("Failed to read file: {:?}", path))?;
for captures in PACKAGED_WORKFLOW_REGEX.captures_iter(&content) {
if let Some(package_name) = captures.get(1) {
package_names.push(package_name.as_str().to_string());
}
}
}
}
Ok(package_names)
}
pub(crate) fn get_current_architecture() -> String {
std::env::consts::ARCH.to_string()
}