use std::cmp::Ordering;
use std::collections::HashMap;
use std::collections::hash_map::Entry;
use std::collections::hash_map::VacantEntry;
use std::path::Path;
use std::path::PathBuf;
use std::rc::Rc;
use anyhow::Result;
use anyhow::bail;
use ignore::overrides::Override;
#[cfg(test)]
use indexmap::IndexMap;
use once_cell::unsync::OnceCell;
use same_file::is_same_file;
use tempfile::TempDir;
use tempfile::tempdir;
use tracing::debug;
use crate::Metadata24;
use crate::archive_source::ArchiveSource;
use crate::archive_source::FileSourceData;
use crate::archive_source::GeneratedSourceData;
use super::ModuleWriter;
use super::ModuleWriterInternal;
use super::PathWriter;
use super::SDistWriter;
use super::WheelWriter;
#[cfg(test)]
use super::mock_writer::MockWriter;
use super::write_dist_info;
pub struct VirtualWriter<W> {
inner: W,
tracker: HashMap<PathBuf, ArchiveSource>,
excludes: Override,
target_exclusion_warning_emitted: bool,
temp_dir: OnceCell<Rc<TempDir>>,
pending_prepends: HashMap<PathBuf, Vec<u8>>,
}
impl<W: ModuleWriterInternal> VirtualWriter<W> {
pub fn new(inner: W, excludes: Override) -> Self {
Self {
inner,
tracker: HashMap::new(),
excludes,
target_exclusion_warning_emitted: false,
temp_dir: OnceCell::new(),
pending_prepends: HashMap::new(),
}
}
pub(crate) fn temp_dir(&self) -> Result<Rc<TempDir>> {
self.temp_dir
.get_or_try_init(|| {
let temp_dir = tempdir()?;
Ok(Rc::new(temp_dir))
})
.cloned()
}
pub(crate) fn exclude(&self, path: impl AsRef<Path>) -> bool {
self.excludes.matched(path.as_ref(), false).is_whitelist()
}
pub(crate) fn contains_target(&self, target: impl AsRef<Path>) -> bool {
self.tracker.contains_key(target.as_ref())
}
fn get_entry(
&mut self,
target: PathBuf,
source: Option<&Path>,
) -> Result<Option<VacantEntry<'_, PathBuf, ArchiveSource>>> {
if let Some(source) = source
&& self.exclude(source)
{
return Ok(None);
}
if self.exclude(&target) {
if !self.target_exclusion_warning_emitted {
self.target_exclusion_warning_emitted = true;
eprintln!(
"⚠️ Warning: A file was excluded from the archive by the target path in the archive\n\
⚠️ instead of the source path on the filesystem. This behavior is deprecated and\n\
⚠️ will be removed in future versions of maturin.",
);
}
debug!("Excluded file {target:?} from archive by target path");
return Ok(None);
}
let entry = match self.tracker.entry(target.clone()) {
Entry::Vacant(entry) => Some(entry),
Entry::Occupied(entry) => {
match (entry.get().path(), source) {
(None, None) => {
bail!(
"Generated file {} was already added, can't add it again",
target.display()
);
}
(Some(previous_source), None) => {
bail!(
"File {} was already added from {}, can't overwrite with generated file",
target.display(),
previous_source.display()
)
}
(None, Some(source)) => {
bail!(
"Generated file {} was already added, can't overwrite it with {}",
target.display(),
source.display()
);
}
(Some(previous_source), Some(source)) => {
if is_same_file(source, previous_source).unwrap_or(false) {
None
} else {
bail!(
"File {} was already added from {}, can't add it from {}",
target.display(),
previous_source.display(),
source.display()
);
}
}
}
}
};
Ok(entry)
}
pub(crate) fn add_entry(
&mut self,
target: impl AsRef<Path>,
source: ArchiveSource,
) -> Result<()> {
let target = target.as_ref();
if let Some(entry) = self.get_entry(target.to_path_buf(), source.path())? {
debug!("Tracked entry {target:?}");
entry.insert(source);
}
Ok(())
}
pub(crate) fn add_file_force(
&mut self,
target: impl AsRef<Path>,
source: impl AsRef<Path>,
executable: bool,
) -> Result<()> {
let target = target.as_ref();
let source = source.as_ref();
debug!("Adding {} from {}", target.display(), source.display());
let source = ArchiveSource::File(FileSourceData {
path: source.to_path_buf(),
executable,
});
self.add_entry_force(target, source)
}
pub(crate) fn add_entry_force(
&mut self,
target: impl AsRef<Path>,
source: ArchiveSource,
) -> Result<()> {
let target = target.as_ref();
debug!("Adding {} (forced)", target.display());
if self.tracker.insert(target.to_path_buf(), source).is_some() {
bail!(
"File {} overwrote an existing tracked file",
target.display()
);
}
Ok(())
}
pub(crate) fn prepend_to(&mut self, target: impl AsRef<Path>, data: Vec<u8>) -> Result<()> {
self.pending_prepends
.entry(target.as_ref().to_path_buf())
.or_default()
.extend_from_slice(&data);
Ok(())
}
fn apply_pending_prepends(&mut self) -> Result<()> {
for (target, prepend_data) in std::mem::take(&mut self.pending_prepends) {
let is_python = target
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("py"));
let Some(existing) = self.tracker.remove(&target) else {
tracing::warn!(
"Skipping prepend for {} because it was not tracked. \
This avoids creating a new file such as __init__.py unexpectedly.",
target.display()
);
continue;
};
let (file_content, path, executable) = match existing {
ArchiveSource::Generated(g) => (g.data, g.path, g.executable),
ArchiveSource::File(f) => (fs_err::read(&f.path)?, Some(f.path), f.executable),
};
let insert_pos = if is_python {
find_python_insertion_point(&file_content)
} else {
0
};
let mut new_data = Vec::with_capacity(file_content.len() + prepend_data.len());
new_data.extend_from_slice(&file_content[..insert_pos]);
new_data.extend_from_slice(&prepend_data);
new_data.extend_from_slice(&file_content[insert_pos..]);
self.tracker.insert(
target,
ArchiveSource::Generated(GeneratedSourceData {
data: new_data,
path,
executable,
}),
);
}
Ok(())
}
fn finish_internal(
mut self,
comparator: &mut impl FnMut(&PathBuf, &PathBuf) -> Ordering,
) -> Result<W> {
self.apply_pending_prepends()?;
let mut entries: Vec<_> = self.tracker.into_iter().collect();
entries.sort_unstable_by(|(p1, _), (p2, _)| comparator(p1, p2));
for (target, entry) in entries {
self.inner.add_entry(target, entry)?;
}
Ok(self.inner)
}
}
fn find_python_insertion_point(content: &[u8]) -> usize {
let mut pos = 0;
if content.starts_with(b"\xef\xbb\xbf") {
pos = 3;
}
let find_line_end = |start: usize| -> usize {
content[start..]
.iter()
.position(|&b| b == b'\n')
.map(|i| start + i + 1)
.unwrap_or(content.len())
};
let is_comment_or_blank = |line: &[u8]| -> bool {
let trimmed = line
.iter()
.position(|b| !b.is_ascii_whitespace())
.map(|i| &line[i..])
.unwrap_or(&[]);
trimmed.is_empty() || trimmed.starts_with(b"#")
};
let is_encoding_line = |line: &[u8]| -> bool {
let trimmed = line
.iter()
.position(|b| !b.is_ascii_whitespace())
.map(|i| &line[i..])
.unwrap_or(&[]);
if !trimmed.starts_with(b"#") {
return false;
}
let comment = &trimmed[1..];
comment
.windows(7)
.any(|w| &w[..6] == b"coding" && (w[6] == b':' || w[6] == b'='))
};
let pos_after_bom = pos;
if pos < content.len() {
let line_end = find_line_end(pos);
let line = &content[pos..line_end];
if line.starts_with(b"#!") {
pos = line_end;
}
}
let lines_to_check: usize = if pos > pos_after_bom { 1 } else { 2 };
for _ in 0..lines_to_check {
if pos >= content.len() {
break;
}
let line_end = find_line_end(pos);
let line = &content[pos..line_end];
if is_encoding_line(line) {
pos = line_end;
break;
}
}
let mut insertion_point = pos;
while pos < content.len() {
let line_end = find_line_end(pos);
let line = &content[pos..line_end];
if is_comment_or_blank(line) {
pos = line_end;
insertion_point = pos;
} else {
break;
}
}
if pos < content.len() {
let trimmed_start = content[pos..]
.iter()
.position(|b| !b.is_ascii_whitespace())
.map(|i| pos + i)
.unwrap_or(pos);
let rest = &content[trimmed_start..];
let quote = if rest.starts_with(b"\"\"\"") {
Some(b"\"\"\"".as_slice())
} else if rest.starts_with(b"'''") {
Some(b"'''".as_slice())
} else if rest.starts_with(b"r\"\"\"")
|| rest.starts_with(b"R\"\"\"")
|| rest.starts_with(b"u\"\"\"")
|| rest.starts_with(b"U\"\"\"")
{
Some(b"\"\"\"".as_slice())
} else if rest.starts_with(b"r'''")
|| rest.starts_with(b"R'''")
|| rest.starts_with(b"u'''")
|| rest.starts_with(b"U'''")
{
Some(b"'''".as_slice())
} else {
None
};
if let Some(q) = quote {
let start_offset = if rest.starts_with(b"r")
|| rest.starts_with(b"R")
|| rest.starts_with(b"u")
|| rest.starts_with(b"U")
{
4
} else {
3
};
if let Some(end_idx) = rest[start_offset..]
.windows(3)
.position(|w| w == q)
.map(|i| trimmed_start + start_offset + i + 3)
{
pos = end_idx;
if pos < content.len() && content[pos] == b'\n' {
pos += 1;
}
insertion_point = pos;
}
}
}
let mut in_future_import = false;
let mut paren_depth: usize = 0;
while pos < content.len() {
let line_end = find_line_end(pos);
let line = &content[pos..line_end];
if in_future_import {
for &b in line {
match b {
b'(' => paren_depth += 1,
b')' => paren_depth = paren_depth.saturating_sub(1),
_ => {}
}
}
let ends_with_backslash = !line.is_empty()
&& line
.iter()
.rposition(|b| !b.is_ascii_whitespace())
.map(|i| line[i] == b'\\')
.unwrap_or(false);
if paren_depth == 0 && !ends_with_backslash {
in_future_import = false;
insertion_point = line_end;
}
} else {
let trimmed_start = line
.iter()
.position(|b| !b.is_ascii_whitespace())
.unwrap_or(0);
if line[trimmed_start..].starts_with(b"from __future__") {
paren_depth = 0;
for &b in line {
match b {
b'(' => paren_depth += 1,
b')' => paren_depth = paren_depth.saturating_sub(1),
_ => {}
}
}
let ends_with_backslash = !line.is_empty()
&& line
.iter()
.rposition(|b| !b.is_ascii_whitespace())
.map(|i| line[i] == b'\\')
.unwrap_or(false);
if paren_depth > 0 || ends_with_backslash {
in_future_import = true;
} else {
insertion_point = line_end;
}
} else if !is_comment_or_blank(line) {
break;
}
}
pos = line_end;
}
insertion_point
}
impl<W: ModuleWriterInternal> super::private::Sealed for VirtualWriter<W> {}
impl<W: ModuleWriterInternal> ModuleWriter for VirtualWriter<W> {
fn add_bytes(
&mut self,
target: impl AsRef<Path>,
source: Option<&Path>,
data: impl Into<Vec<u8>>,
executable: bool,
) -> Result<()> {
let source = ArchiveSource::Generated(GeneratedSourceData {
data: data.into(),
path: source.map(ToOwned::to_owned),
executable,
});
self.add_entry(target, source)
}
fn add_file(
&mut self,
target: impl AsRef<Path>,
source: impl AsRef<Path>,
executable: bool,
) -> Result<()> {
let source = ArchiveSource::File(FileSourceData {
path: source.as_ref().to_path_buf(),
executable,
});
self.add_entry(target, source)
}
}
impl VirtualWriter<PathWriter> {
pub fn finish(self) -> Result<()> {
let mut comparator = PathBuf::cmp;
let _inner = self.finish_internal(&mut comparator)?;
Ok(())
}
}
impl VirtualWriter<SDistWriter> {
pub fn finish(self, pkg_info_path: &Path) -> Result<PathBuf> {
let mut comparator = self.inner.file_ordering(pkg_info_path);
let inner = self.finish_internal(&mut comparator)?;
let path = inner.finish()?;
Ok(path)
}
}
impl VirtualWriter<WheelWriter> {
pub fn finish(
mut self,
metadata24: &Metadata24,
pyproject_dir: &Path,
tags: &[String],
) -> Result<PathBuf> {
let dist_info_dir = write_dist_info(&mut self, pyproject_dir, metadata24, tags)?;
let mut comparator = self.inner.file_ordering(&dist_info_dir);
let inner = self.finish_internal(&mut comparator)?;
inner.finish(&dist_info_dir)
}
}
#[cfg(test)]
impl VirtualWriter<MockWriter> {
pub fn finish(self) -> Result<IndexMap<PathBuf, Vec<u8>>> {
let mut comparator = PathBuf::cmp;
let inner = self.finish_internal(&mut comparator)?;
Ok(inner.finish())
}
}
#[cfg(test)]
mod tests {
use std::path::Path;
use anyhow::Result;
use ignore::overrides::Override;
use ignore::overrides::OverrideBuilder;
use insta::assert_snapshot;
use itertools::Itertools as _;
use tempfile::TempDir;
use crate::ModuleWriter;
use crate::module_writer::mock_writer::MockWriter;
use super::VirtualWriter;
#[test]
fn virtual_writer_no_excludes() -> Result<()> {
let mut writer = VirtualWriter::new(MockWriter::default(), Override::empty());
assert!(writer.tracker.is_empty());
writer.add_empty_file("test")?;
assert_eq!(writer.tracker.len(), 1);
writer.finish()?;
Ok(())
}
#[test]
fn virtual_writer_excludes() -> Result<()> {
const EMPTY: &[u8] = &[];
let tmp_dir = TempDir::new()?;
let mut excludes = OverrideBuilder::new(&tmp_dir);
excludes.add("test*")?;
excludes.add("!test2")?;
let mut writer = VirtualWriter::new(MockWriter::default(), excludes.build()?);
writer.add_bytes("test1", Some(Path::new("test1")), EMPTY, true)?;
writer.add_bytes("test3", Some(Path::new("test3")), EMPTY, true)?;
assert!(writer.tracker.is_empty());
writer.add_bytes("yes", Some(Path::new("yes")), EMPTY, true)?;
assert!(!writer.tracker.is_empty());
writer.add_bytes("test2", Some(Path::new("test2")), EMPTY, true)?;
assert_eq!(writer.tracker.len(), 2);
let files = writer.finish()?;
tmp_dir.close()?;
assert_snapshot!(files.keys().map(|p| p.to_string_lossy()).collect_vec().join("\n"), @r"
test2
yes
");
Ok(())
}
#[test]
fn virtual_writer_force_bypasses_excludes() -> Result<()> {
use std::io::Write as _;
let tmp_dir = TempDir::new()?;
let source_file = tmp_dir.path().join("artifact.so");
{
let mut file = fs_err::File::create(&source_file)?;
file.write_all(b"test artifact")?;
}
let mut excludes = OverrideBuilder::new(tmp_dir.path());
excludes.add("*.so")?;
let mut writer = VirtualWriter::new(MockWriter::default(), excludes.build()?);
writer.add_file("excluded.so", &source_file, true)?;
assert!(
writer.tracker.is_empty(),
"Regular add_file should be excluded"
);
writer.add_file_force("forced.so", &source_file, true)?;
assert_eq!(
writer.tracker.len(),
1,
"add_file_force should bypass exclusion"
);
let files = writer.finish()?;
assert!(files.contains_key(Path::new("forced.so")));
assert!(!files.contains_key(Path::new("excluded.so")));
tmp_dir.close()?;
Ok(())
}
#[test]
fn test_find_python_insertion_point() {
use super::find_python_insertion_point;
assert_eq!(find_python_insertion_point(b"import os\n"), 0);
let content = b"from __future__ import annotations\nimport os\n";
assert_eq!(find_python_insertion_point(content), 35);
let content =
b"from __future__ import annotations\nfrom __future__ import division\nimport os\n";
assert_eq!(find_python_insertion_point(content), 67);
let content = b"\"\"\"Docstring.\"\"\"\nfrom __future__ import annotations\nimport os\n";
assert_eq!(find_python_insertion_point(content), 52);
let content = b"\"\"\"Module docstring.\"\"\"\nimport os\n";
assert_eq!(find_python_insertion_point(content), 24);
assert_eq!(find_python_insertion_point(b""), 0);
let content = b"from __future__ import (\n annotations,\n)\nimport os\n";
assert_eq!(find_python_insertion_point(content), 44);
let content = b"from __future__ import annotations, \\\n division\nimport os\n";
assert_eq!(find_python_insertion_point(content), 51);
let content = b"\xef\xbb\xbfimport os\n";
assert_eq!(find_python_insertion_point(content), 3);
let content = b"\xef\xbb\xbffrom __future__ import annotations\nimport os\n";
assert_eq!(find_python_insertion_point(content), 38);
let content = b"#!/usr/bin/env python\nimport os\n";
assert_eq!(find_python_insertion_point(content), 22);
let content = b"#!/usr/bin/env python\n# -*- coding: utf-8 -*-\nimport os\n";
assert_eq!(find_python_insertion_point(content), 46);
let content =
b"#!/usr/bin/env python\n# -*- coding: utf-8 -*-\nfrom __future__ import annotations\nimport os\n";
assert_eq!(find_python_insertion_point(content), 81);
let content = b"# coding: utf-8\nimport os\n";
assert_eq!(find_python_insertion_point(content), 16);
let content = b"r\"\"\"Raw docstring.\"\"\"\nimport os\n";
assert_eq!(find_python_insertion_point(content), 22);
let content = b"'''Single quoted docstring.'''\nimport os\n";
assert_eq!(find_python_insertion_point(content), 31);
let content = b"\"\"\"Multi-line\ndocstring.\"\"\"\nimport os\n";
assert_eq!(find_python_insertion_point(content), 28);
}
#[test]
fn virtual_writer_force_detects_duplicates() -> Result<()> {
use std::io::Write as _;
let tmp_dir = TempDir::new()?;
let source_file = tmp_dir.path().join("artifact.so");
{
let mut file = fs_err::File::create(&source_file)?;
file.write_all(b"test artifact")?;
}
let mut writer = VirtualWriter::new(MockWriter::default(), Override::empty());
writer.add_file_force("target.so", &source_file, true)?;
assert_eq!(writer.tracker.len(), 1);
let result = writer.add_file_force("target.so", &source_file, true);
assert!(result.is_err(), "Duplicate add_file_force should fail");
tmp_dir.close()?;
Ok(())
}
}