use std::path::{Path, PathBuf};
use crate::error::Error;
use crate::util;
#[derive(Debug)]
pub struct OverlayPlan {
pub upstream_manifest: PathBuf,
pub sibling_manifest: PathBuf,
pub upstream_already_has_dylib: bool,
pub dropped_comments: Vec<String>,
pub upstream_crate_name: Option<String>,
}
#[derive(Debug, Clone)]
pub struct SyntheticMetadata {
pub dylib_crate: String,
pub extern_crates: Vec<String>,
pub fixture_dirs: Vec<String>,
}
pub fn materialize_overlay(upstream_manifest_path: &Path) -> Result<OverlayPlan, Error> {
materialize_overlay_with_metadata(upstream_manifest_path, None)
}
pub fn materialize_overlay_with_metadata(
upstream_manifest_path: &Path,
synthetic_metadata: Option<&SyntheticMetadata>,
) -> Result<OverlayPlan, Error> {
materialize_overlay_inner(upstream_manifest_path, |_name| synthetic_metadata.cloned())
}
pub fn materialize_overlay_with_synthetic_metadata_builder<F>(
upstream_manifest_path: &Path,
builder: F,
) -> Result<OverlayPlan, Error>
where
F: FnOnce(Option<&str>) -> SyntheticMetadata,
{
materialize_overlay_inner(upstream_manifest_path, |name| Some(builder(name)))
}
fn materialize_overlay_inner<F>(
upstream_manifest_path: &Path,
synthetic_metadata: F,
) -> Result<OverlayPlan, Error>
where
F: FnOnce(Option<&str>) -> Option<SyntheticMetadata>,
{
let raw_bytes = std::fs::read(upstream_manifest_path).map_err(|e| {
Error::io(
e,
"reading upstream Cargo.toml for overlay",
Some(upstream_manifest_path.to_path_buf()),
)
})?;
let raw_text = String::from_utf8(raw_bytes).map_err(|e| {
Error::io(
std::io::Error::new(std::io::ErrorKind::InvalidData, e),
"decoding upstream Cargo.toml as UTF-8",
Some(upstream_manifest_path.to_path_buf()),
)
})?;
let dropped_comments = scan_dropped_comments(&raw_text);
let mut value: toml::Value =
toml::from_str(&raw_text).map_err(|e: toml::de::Error| Error::TomlParse {
path: upstream_manifest_path.to_path_buf(),
message: e.to_string(),
})?;
if is_workspace_root_manifest(&value) {
return Err(Error::Cli {
clap_exit_code: 2,
message: format!(
"error: `--compat-root` must point to a single-crate Cargo.toml; \
`{}` is a workspace root (declares `[workspace]` without `[package]`). \
Pass a member crate's Cargo.toml as `--compat-root` instead.",
upstream_manifest_path.display()
),
});
}
let upstream_already_has_dylib = inspect_existing_crate_type(&value);
let upstream_crate_name = read_upstream_crate_name(&value);
let synthetic = synthetic_metadata(upstream_crate_name.as_deref());
if let toml::Value::Table(top) = &mut value {
let lib_table = top
.entry("lib".to_string())
.or_insert_with(|| toml::Value::Table(toml::map::Map::new()));
if let toml::Value::Table(lib) = lib_table {
canonicalize_crate_type(lib)?;
} else {
return Err(Error::TomlParse {
path: upstream_manifest_path.to_path_buf(),
message: "`[lib]` must be a table, not an inline value".to_string(),
});
}
if let Some(meta) = synthetic.as_ref() {
inject_synthetic_metadata(top, meta);
}
}
let serialized = serialize_canonical(&value)?;
let sibling_path = upstream_manifest_path.with_file_name("Cargo.lihaaf.toml");
let need_write = match std::fs::read(&sibling_path) {
Ok(existing) => existing != serialized,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => true,
Err(e) => {
return Err(Error::io(
e,
"checking existing Cargo.lihaaf.toml for idempotent rerun",
Some(sibling_path.clone()),
));
}
};
if need_write {
util::write_file_atomic(&sibling_path, &serialized)?;
}
Ok(OverlayPlan {
upstream_manifest: upstream_manifest_path.to_path_buf(),
sibling_manifest: sibling_path,
upstream_already_has_dylib,
dropped_comments,
upstream_crate_name,
})
}
fn read_upstream_crate_name(value: &toml::Value) -> Option<String> {
value
.get("package")
.and_then(|p| p.get("name"))
.and_then(|n| n.as_str())
.filter(|s| !s.is_empty())
.map(str::to_string)
}
fn inject_synthetic_metadata(
top: &mut toml::map::Map<String, toml::Value>,
meta: &SyntheticMetadata,
) {
let package_entry = top
.entry("package".to_string())
.or_insert_with(|| toml::Value::Table(toml::map::Map::new()));
let toml::Value::Table(package) = package_entry else {
return;
};
let metadata_entry = package
.entry("metadata".to_string())
.or_insert_with(|| toml::Value::Table(toml::map::Map::new()));
let toml::Value::Table(metadata) = metadata_entry else {
return;
};
let mut lihaaf_table = toml::map::Map::new();
lihaaf_table.insert(
"dylib_crate".to_string(),
toml::Value::String(meta.dylib_crate.clone()),
);
lihaaf_table.insert(
"extern_crates".to_string(),
toml::Value::Array(
meta.extern_crates
.iter()
.cloned()
.map(toml::Value::String)
.collect(),
),
);
lihaaf_table.insert(
"fixture_dirs".to_string(),
toml::Value::Array(
meta.fixture_dirs
.iter()
.cloned()
.map(toml::Value::String)
.collect(),
),
);
metadata.insert("lihaaf".to_string(), toml::Value::Table(lihaaf_table));
}
fn is_workspace_root_manifest(value: &toml::Value) -> bool {
let Some(top) = value.as_table() else {
return false;
};
let has_workspace = top.get("workspace").is_some_and(|v| v.is_table());
let has_package = top.get("package").is_some_and(|v| v.is_table());
has_workspace && !has_package
}
fn inspect_existing_crate_type(value: &toml::Value) -> bool {
let Some(lib) = value.get("lib") else {
return false;
};
let Some(ct) = lib.get("crate-type") else {
return false;
};
let Some(arr) = ct.as_array() else {
return false;
};
arr.iter().filter_map(|v| v.as_str()).any(|s| s == "dylib")
}
pub(crate) fn canonicalize_crate_type(
table: &mut toml::map::Map<String, toml::Value>,
) -> Result<(), Error> {
let existing: Vec<String> = match table.get("crate-type") {
None => Vec::new(),
Some(toml::Value::Array(arr)) => {
let mut out = Vec::with_capacity(arr.len());
for (idx, v) in arr.iter().enumerate() {
match v.as_str() {
Some(s) => out.push(s.to_string()),
None => {
return Err(Error::TomlParse {
path: PathBuf::from("<overlay>"),
message: format!(
"`[lib] crate-type` element at index {idx} is not a string; \
the overlay accepts only string crate-type entries"
),
});
}
}
}
out
}
Some(other) => {
return Err(Error::TomlParse {
path: PathBuf::from("<overlay>"),
message: format!(
"`[lib] crate-type` must be an array of strings, got `{}`",
type_name_of(other)
),
});
}
};
let mut out: Vec<String> = Vec::with_capacity(existing.len() + 2);
out.push("dylib".to_string());
out.push("rlib".to_string());
for entry in &existing {
if entry == "dylib" || entry == "rlib" {
continue;
}
if !out.contains(entry) {
out.push(entry.clone());
}
}
let array = out.into_iter().map(toml::Value::String).collect::<Vec<_>>();
table.insert("crate-type".to_string(), toml::Value::Array(array));
Ok(())
}
pub(crate) fn canonical_key_order() -> &'static [&'static str] {
&[
"package",
"lib",
"bin",
"example",
"test",
"bench",
"dependencies",
"dev-dependencies",
"build-dependencies",
"target",
"features",
"patch",
"replace",
"profile",
"workspace",
]
}
pub(crate) fn serialize_canonical(value: &toml::Value) -> Result<Vec<u8>, Error> {
let top = match value {
toml::Value::Table(t) => t,
other => {
return Err(Error::TomlParse {
path: PathBuf::from("<overlay>"),
message: format!(
"overlay serializer expected a TOML document (table) at the top level, got `{}`",
type_name_of(other)
),
});
}
};
let mut emitted: std::collections::BTreeSet<&str> = std::collections::BTreeSet::new();
let mut order: Vec<String> = Vec::with_capacity(top.len());
for canonical in canonical_key_order() {
if top.contains_key(*canonical) {
order.push((*canonical).to_string());
emitted.insert(*canonical);
}
}
let mut leftovers: Vec<&String> = top
.keys()
.filter(|k| !emitted.contains(k.as_str()))
.collect();
leftovers.sort();
for k in leftovers {
order.push(k.clone());
}
let mut segments: Vec<String> = Vec::with_capacity(order.len());
for key in &order {
let v = top.get(key).expect("key came from `top`'s own iteration");
let mut wrapper = toml::map::Map::new();
wrapper.insert(key.clone(), v.clone());
let segment =
toml::ser::to_string(&toml::Value::Table(wrapper)).map_err(|e: toml::ser::Error| {
Error::TomlParse {
path: PathBuf::from("<overlay>"),
message: format!("overlay serializer failed for `{key}`: {e}"),
}
})?;
segments.push(segment);
}
let joined = segments.join("\n");
let normalized = post_process_output(&joined);
Ok(normalized.into_bytes())
}
fn post_process_output(input: &str) -> String {
let mut lines: Vec<&str> = Vec::with_capacity(input.lines().count());
for line in input.lines() {
let trimmed = line.trim_end_matches([' ', '\t', '\r']);
lines.push(trimmed);
}
let mut out = String::with_capacity(input.len());
let mut prev_blank = false;
for line in &lines {
let is_blank = line.is_empty();
if is_blank && prev_blank {
continue;
}
out.push_str(line);
out.push('\n');
prev_blank = is_blank;
}
while out.ends_with("\n\n") {
out.pop();
}
if !out.ends_with('\n') {
out.push('\n');
}
out
}
fn scan_dropped_comments(text: &str) -> Vec<String> {
let bytes = text.as_bytes();
let mut out: Vec<String> = Vec::new();
let mut i = 0usize;
let mut in_basic = false;
let mut in_literal = false;
let mut in_multi_basic = false;
let mut in_multi_literal = false;
while i < bytes.len() {
let b = bytes[i];
if in_multi_basic {
if b == b'\\' && i + 1 < bytes.len() {
i += 2;
continue;
}
if b == b'"' && i + 2 < bytes.len() && bytes[i + 1] == b'"' && bytes[i + 2] == b'"' {
in_multi_basic = false;
i += 3;
continue;
}
i += 1;
continue;
}
if in_multi_literal {
if b == b'\'' && i + 2 < bytes.len() && bytes[i + 1] == b'\'' && bytes[i + 2] == b'\'' {
in_multi_literal = false;
i += 3;
continue;
}
i += 1;
continue;
}
if in_basic {
if b == b'\\' && i + 1 < bytes.len() {
i += 2;
continue;
}
if b == b'"' {
in_basic = false;
i += 1;
continue;
}
if b == b'\n' {
in_basic = false;
i += 1;
continue;
}
i += 1;
continue;
}
if in_literal {
if b == b'\'' {
in_literal = false;
i += 1;
continue;
}
if b == b'\n' {
in_literal = false;
i += 1;
continue;
}
i += 1;
continue;
}
if b == b'#' {
let start = i + 1;
let mut end = start;
while end < bytes.len() && bytes[end] != b'\n' {
end += 1;
}
let body = &text[start..end];
out.push(body.trim().to_string());
i = end;
continue;
}
if b == b'"' {
if i + 2 < bytes.len() && bytes[i + 1] == b'"' && bytes[i + 2] == b'"' {
in_multi_basic = true;
i += 3;
continue;
}
in_basic = true;
i += 1;
continue;
}
if b == b'\'' {
if i + 2 < bytes.len() && bytes[i + 1] == b'\'' && bytes[i + 2] == b'\'' {
in_multi_literal = true;
i += 3;
continue;
}
in_literal = true;
i += 1;
continue;
}
i += 1;
}
out
}
#[cfg(test)]
fn extract_unquoted_comment(line: &str) -> Option<String> {
let comments = scan_dropped_comments(line);
comments.into_iter().next()
}
fn type_name_of(v: &toml::Value) -> &'static str {
match v {
toml::Value::String(_) => "string",
toml::Value::Integer(_) => "integer",
toml::Value::Float(_) => "float",
toml::Value::Boolean(_) => "boolean",
toml::Value::Datetime(_) => "datetime",
toml::Value::Array(_) => "array",
toml::Value::Table(_) => "table",
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn canonicalize_inserts_dylib_rlib_when_absent() {
let mut t = toml::map::Map::new();
canonicalize_crate_type(&mut t).unwrap();
let ct = t.get("crate-type").unwrap().as_array().unwrap();
let strs: Vec<&str> = ct.iter().filter_map(|v| v.as_str()).collect();
assert_eq!(strs, vec!["dylib", "rlib"]);
}
#[test]
fn canonicalize_prepends_dylib_to_rlib_only() {
let mut t = toml::map::Map::new();
t.insert(
"crate-type".into(),
toml::Value::Array(vec![toml::Value::String("rlib".into())]),
);
canonicalize_crate_type(&mut t).unwrap();
let ct = t.get("crate-type").unwrap().as_array().unwrap();
let strs: Vec<&str> = ct.iter().filter_map(|v| v.as_str()).collect();
assert_eq!(strs, vec!["dylib", "rlib"]);
}
#[test]
fn canonicalize_appends_rlib_when_only_dylib() {
let mut t = toml::map::Map::new();
t.insert(
"crate-type".into(),
toml::Value::Array(vec![toml::Value::String("dylib".into())]),
);
canonicalize_crate_type(&mut t).unwrap();
let ct = t.get("crate-type").unwrap().as_array().unwrap();
let strs: Vec<&str> = ct.iter().filter_map(|v| v.as_str()).collect();
assert_eq!(strs, vec!["dylib", "rlib"]);
}
#[test]
fn canonicalize_preserves_cdylib_after_pair() {
let mut t = toml::map::Map::new();
t.insert(
"crate-type".into(),
toml::Value::Array(vec![toml::Value::String("cdylib".into())]),
);
canonicalize_crate_type(&mut t).unwrap();
let ct = t.get("crate-type").unwrap().as_array().unwrap();
let strs: Vec<&str> = ct.iter().filter_map(|v| v.as_str()).collect();
assert_eq!(strs, vec!["dylib", "rlib", "cdylib"]);
}
#[test]
fn canonicalize_dedups_duplicates() {
let mut t = toml::map::Map::new();
t.insert(
"crate-type".into(),
toml::Value::Array(vec![
toml::Value::String("rlib".into()),
toml::Value::String("dylib".into()),
toml::Value::String("rlib".into()),
toml::Value::String("cdylib".into()),
]),
);
canonicalize_crate_type(&mut t).unwrap();
let ct = t.get("crate-type").unwrap().as_array().unwrap();
let strs: Vec<&str> = ct.iter().filter_map(|v| v.as_str()).collect();
assert_eq!(strs, vec!["dylib", "rlib", "cdylib"]);
}
#[test]
fn canonicalize_rejects_non_string_element() {
let mut t = toml::map::Map::new();
t.insert(
"crate-type".into(),
toml::Value::Array(vec![toml::Value::Integer(1)]),
);
let err = canonicalize_crate_type(&mut t).unwrap_err();
let s = format!("{err:?}");
assert!(
s.contains("not a string"),
"diagnostic must name the failure: {s}"
);
}
#[test]
fn canonical_key_order_starts_with_package() {
assert_eq!(canonical_key_order()[0], "package");
}
#[test]
fn extract_unquoted_comment_strips_leading_hash() {
assert_eq!(
extract_unquoted_comment("# a leading comment"),
Some("a leading comment".into())
);
}
#[test]
fn extract_unquoted_comment_handles_trailing() {
assert_eq!(
extract_unquoted_comment(r#"name = "demo" # trailing"#),
Some("trailing".into())
);
}
#[test]
fn extract_unquoted_comment_ignores_hash_inside_string() {
assert_eq!(
extract_unquoted_comment(r#"url = "http://example.com/#anchor""#),
None
);
}
#[test]
fn extract_unquoted_comment_ignores_hash_inside_single_quote() {
assert_eq!(extract_unquoted_comment(r#"name = 'foo#bar'"#), None);
}
#[test]
fn scan_ignores_hash_inside_multiline_basic_string() {
let text = "description = \"\"\"\nline with #notacomment\n\"\"\"\n";
let comments = scan_dropped_comments(text);
assert!(
comments.iter().all(|c| !c.contains("notacomment")),
"multi-line basic string body must not be classified as a comment; got {comments:?}",
);
}
#[test]
fn scan_ignores_hash_inside_multiline_literal_string() {
let text = "description = '''\nline with #stillnotacomment\n'''\n";
let comments = scan_dropped_comments(text);
assert!(
comments.iter().all(|c| !c.contains("stillnotacomment")),
"multi-line literal string body must not be classified as a comment; got {comments:?}",
);
}
#[test]
fn scan_recognizes_comment_after_multiline_string_closes() {
let text = "description = \"\"\"\nblock\n\"\"\" # real comment\n";
let comments = scan_dropped_comments(text);
assert!(
comments.iter().any(|c| c == "real comment"),
"comment AFTER the multi-line string close must be captured; got {comments:?}",
);
assert!(
comments.iter().all(|c| !c.contains("block")),
"multi-line body must never appear as a comment; got {comments:?}",
);
}
#[test]
fn scan_basic_string_escape_does_not_strand_state() {
let text = "name = \"foo \\\" #notacomment\"\n# real\n";
let comments = scan_dropped_comments(text);
assert!(
!comments.iter().any(|c| c.contains("notacomment")),
"escaped quote inside basic string must keep scanner in-string; got {comments:?}",
);
assert!(
comments.iter().any(|c| c == "real"),
"comment on the following line must still be captured; got {comments:?}",
);
}
#[test]
fn post_process_strips_trailing_whitespace() {
let raw = "foo = 1 \nbar = 2\t\n";
let out = post_process_output(raw);
assert!(out.lines().all(|l| !l.ends_with(' ') && !l.ends_with('\t')));
}
#[test]
fn post_process_strips_cr() {
let raw = "foo = 1\r\nbar = 2\r\n";
let out = post_process_output(raw);
assert!(!out.contains('\r'));
}
#[test]
fn post_process_collapses_blank_runs() {
let raw = "foo = 1\n\n\n\nbar = 2\n";
let out = post_process_output(raw);
assert_eq!(out, "foo = 1\n\nbar = 2\n");
}
#[test]
fn serialize_canonical_emits_package_first() {
let input = r#"
[features]
default = []
[dependencies]
serde = "1"
[package]
name = "demo"
version = "0.1.0"
"#;
let val: toml::Value = toml::from_str(input).unwrap();
let bytes = serialize_canonical(&val).unwrap();
let out = String::from_utf8(bytes).unwrap();
let first_header = out.lines().find(|l| l.starts_with('[')).unwrap();
assert_eq!(first_header, "[package]", "got:\n{out}");
}
}