use crate::mock_command::{CommandChild, RunCommand};
use blake3::Hasher as blake3_Hasher;
use byteorder::{BigEndian, ByteOrder};
use fs::File;
use fs_err as fs;
use object::read::archive::ArchiveFile;
use object::read::macho::{FatArch, MachOFatFile32, MachOFatFile64};
use serde::{Deserialize, Serialize};
use std::borrow::Cow;
use std::cell::Cell;
use std::ffi::{OsStr, OsString};
use std::hash::Hasher;
use std::io::prelude::*;
use std::path::{Path, PathBuf};
use std::process::{self, Stdio};
use std::str;
use std::time::Duration;
use std::time::{self, SystemTime};
use crate::errors::*;
pub const BASE64_URL_SAFE_ENGINE: base64::engine::GeneralPurpose =
base64::engine::general_purpose::URL_SAFE_NO_PAD;
pub const HASH_BUFFER_SIZE: usize = 128 * 1024;
#[derive(Clone)]
pub struct Digest {
inner: blake3_Hasher,
}
impl Digest {
pub fn new() -> Digest {
Digest {
inner: blake3_Hasher::new(),
}
}
pub async fn file<T>(path: T, pool: &tokio::runtime::Handle) -> Result<String>
where
T: AsRef<Path>,
{
Self::reader(path.as_ref().to_owned(), pool).await
}
pub fn reader_sync<R: Read>(reader: R) -> Result<String> {
Self::reader_sync_with(reader, |_| {}).map(|d| d.finish())
}
pub fn reader_sync_with<R: Read, F: FnMut(&[u8])>(mut reader: R, mut each: F) -> Result<Self> {
let mut m = Digest::new();
let mut buffer = [0; HASH_BUFFER_SIZE];
loop {
let count = reader.read(&mut buffer[..])?;
if count == 0 {
break;
}
each(&buffer[..count]);
m.update(&buffer[..count]);
}
Ok(m)
}
pub fn reader_sync_time_macros<R: Read>(reader: R) -> Result<(String, TimeMacroFinder)> {
let mut finder = TimeMacroFinder::new();
Ok((
Self::reader_sync_with(reader, |visit| finder.find_time_macros(visit))?.finish(),
finder,
))
}
pub async fn reader(path: PathBuf, pool: &tokio::runtime::Handle) -> Result<String> {
pool.spawn_blocking(move || {
let reader = File::open(&path)
.with_context(|| format!("Failed to open file for hashing: {:?}", path))?;
Digest::reader_sync(reader)
})
.await?
}
pub fn update(&mut self, bytes: &[u8]) {
self.inner.update(bytes);
}
pub fn delimiter(&mut self, name: &[u8]) {
self.update(b"\0SCCACHE\0");
self.update(name);
self.update(b"\0");
}
pub fn finish(self) -> String {
hex(self.inner.finalize().as_bytes())
}
}
impl Default for Digest {
fn default() -> Self {
Self::new()
}
}
const MAX_HAYSTACK_LEN: usize = b"__TIMESTAMP__".len();
#[cfg(test)]
pub const MAX_TIME_MACRO_HAYSTACK_LEN: usize = MAX_HAYSTACK_LEN;
#[derive(Debug, Default)]
pub struct TimeMacroFinder {
found_date: Cell<bool>,
found_time: Cell<bool>,
found_timestamp: Cell<bool>,
overlap_buffer: [u8; MAX_HAYSTACK_LEN * 2],
full_chunks_counter: usize,
previous_small_read: Vec<u8>,
}
impl TimeMacroFinder {
pub fn find_time_macros(&mut self, visit: &[u8]) {
if self.full_chunks_counter == 0 {
if visit.len() <= MAX_HAYSTACK_LEN {
if !self.previous_small_read.is_empty() {
self.previous_small_read.extend(visit);
} else {
visit.clone_into(&mut self.previous_small_read);
}
self.find_macros(&self.previous_small_read);
return;
}
let right_half = visit.len() - MAX_HAYSTACK_LEN;
self.overlap_buffer[..MAX_HAYSTACK_LEN].copy_from_slice(&visit[right_half..]);
} else {
if visit.len() < MAX_HAYSTACK_LEN {
if !self.previous_small_read.is_empty() {
self.previous_small_read.extend(visit);
} else {
let mut buf = self.overlap_buffer[..MAX_HAYSTACK_LEN].to_owned();
buf.extend(visit);
self.previous_small_read = buf;
}
self.overlap_buffer[MAX_HAYSTACK_LEN..].copy_from_slice(&[0; MAX_HAYSTACK_LEN]);
self.overlap_buffer[MAX_HAYSTACK_LEN..MAX_HAYSTACK_LEN + visit.len()]
.copy_from_slice(visit);
self.find_macros(&self.previous_small_read);
self.find_macros(&self.overlap_buffer);
return;
} else {
let left_half = MAX_HAYSTACK_LEN;
self.overlap_buffer[left_half..].copy_from_slice(&visit[..left_half]);
self.find_macros(&self.overlap_buffer);
self.overlap_buffer = Default::default();
let right_half = visit.len() - MAX_HAYSTACK_LEN;
self.overlap_buffer[..MAX_HAYSTACK_LEN].copy_from_slice(&visit[right_half..]);
}
self.find_macros(&self.overlap_buffer);
}
if !self.previous_small_read.is_empty() {
let mut concatenated = self.previous_small_read.clone();
concatenated.extend(visit);
self.find_macros(&concatenated);
}
self.find_macros(visit);
self.full_chunks_counter += 1;
self.previous_small_read.clear();
}
fn find_macros(&self, buffer: &[u8]) {
if memchr::memmem::find(buffer, b"__TIMESTAMP__").is_some() {
self.found_timestamp.set(true);
}
if memchr::memmem::find(buffer, b"__TIME__").is_some() {
self.found_time.set(true);
}
if memchr::memmem::find(buffer, b"__DATE__").is_some() {
self.found_date.set(true);
}
}
pub fn found_time_macros(&self) -> bool {
self.found_date() || self.found_time() || self.found_timestamp()
}
pub fn found_time(&self) -> bool {
self.found_time.get()
}
pub fn found_date(&self) -> bool {
self.found_date.get()
}
pub fn found_timestamp(&self) -> bool {
self.found_timestamp.get()
}
pub fn new() -> Self {
Default::default()
}
}
pub fn hex(bytes: &[u8]) -> String {
let mut s = String::with_capacity(bytes.len() * 2);
for &byte in bytes {
s.push(hex(byte & 0xf));
s.push(hex((byte >> 4) & 0xf));
}
return s;
fn hex(byte: u8) -> char {
match byte {
0..=9 => (b'0' + byte) as char,
_ => (b'a' + byte - 10) as char,
}
}
}
pub async fn hash_all(files: &[PathBuf], pool: &tokio::runtime::Handle) -> Result<Vec<String>> {
let start = time::Instant::now();
let count = files.len();
let iter = files.iter().map(move |f| Digest::file(f, pool));
let hashes = futures::future::try_join_all(iter).await?;
trace!(
"Hashed {} files in {}",
count,
fmt_duration_as_secs(&start.elapsed())
);
Ok(hashes)
}
pub async fn hash_all_archives(
files: &[PathBuf],
pool: &tokio::runtime::Handle,
) -> Result<Vec<String>> {
let start = time::Instant::now();
let count = files.len();
let iter = files.iter().map(|path| {
let path = path.clone();
pool.spawn_blocking(move || -> Result<String> {
let mut m = Digest::new();
let archive_file = File::open(&path)
.with_context(|| format!("Failed to open file for hashing: {:?}", path))?;
let archive_mmap =
unsafe { memmap2::MmapOptions::new().map_copy_read_only(&archive_file)? };
if let Ok(fat) = MachOFatFile32::parse(&*archive_mmap) {
for arch in fat.arches() {
hash_regular_archive(&mut m, arch.data(&*archive_mmap)?)?;
}
} else if let Ok(fat) = MachOFatFile64::parse(&*archive_mmap) {
for arch in fat.arches() {
hash_regular_archive(&mut m, arch.data(&*archive_mmap)?)?;
}
} else {
hash_regular_archive(&mut m, &archive_mmap)?;
}
Ok(m.finish())
})
});
let mut hashes = futures::future::try_join_all(iter).await?;
if let Some(i) = hashes.iter().position(|res| res.is_err()) {
return Err(hashes.swap_remove(i).unwrap_err());
}
trace!(
"Hashed {} files in {}",
count,
fmt_duration_as_secs(&start.elapsed())
);
Ok(hashes.into_iter().map(|res| res.unwrap()).collect())
}
fn hash_regular_archive(m: &mut Digest, data: &[u8]) -> Result<()> {
let archive = ArchiveFile::parse(data)?;
for entry in archive.members() {
let entry = entry?;
m.update(entry.name());
m.update(entry.data(data)?);
}
Ok(())
}
pub fn fmt_duration_as_secs(duration: &Duration) -> String {
format!("{}.{:03} s", duration.as_secs(), duration.subsec_millis())
}
async fn wait_with_input_output<T>(mut child: T, input: Option<Vec<u8>>) -> Result<process::Output>
where
T: CommandChild + 'static,
{
use tokio::io::{AsyncReadExt, AsyncWriteExt};
let stdin = input.and_then(|i| {
child.take_stdin().map(|mut stdin| async move {
stdin.write_all(&i).await.context("failed to write stdin")
})
});
let stdout = child.take_stdout();
let stdout = async move {
match stdout {
Some(mut stdout) => {
let mut buf = Vec::new();
stdout
.read_to_end(&mut buf)
.await
.context("failed to read stdout")?;
Result::Ok(Some(buf))
}
None => Ok(None),
}
};
let stderr = child.take_stderr();
let stderr = async move {
match stderr {
Some(mut stderr) => {
let mut buf = Vec::new();
stderr
.read_to_end(&mut buf)
.await
.context("failed to read stderr")?;
Result::Ok(Some(buf))
}
None => Ok(None),
}
};
let status = async move {
if let Some(stdin) = stdin {
let _ = stdin.await;
}
child.wait().await.context("failed to wait for child")
};
let (status, stdout, stderr) = futures::future::try_join3(status, stdout, stderr).await?;
Ok(process::Output {
status,
stdout: stdout.unwrap_or_default(),
stderr: stderr.unwrap_or_default(),
})
}
pub async fn run_input_output<C>(mut command: C, input: Option<Vec<u8>>) -> Result<process::Output>
where
C: RunCommand,
{
let child = command
.stdin(if input.is_some() {
Stdio::piped()
} else {
Stdio::inherit()
})
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.await?;
wait_with_input_output(child, input)
.await
.and_then(|output| {
if output.status.success() {
Ok(output)
} else {
Err(ProcessError(output).into())
}
})
}
pub fn write_length_prefixed_bincode<W, S>(mut writer: W, data: S) -> Result<()>
where
W: Write,
S: Serialize,
{
let bytes = bincode::serialize(&data)?;
let mut len = [0; 4];
BigEndian::write_u32(&mut len, bytes.len() as u32);
writer.write_all(&len)?;
writer.write_all(&bytes)?;
writer.flush()?;
Ok(())
}
pub trait OsStrExt {
fn starts_with(&self, s: &str) -> bool;
fn split_prefix(&self, s: &str) -> Option<OsString>;
}
#[cfg(unix)]
use std::os::unix::ffi::OsStrExt as _OsStrExt;
#[cfg(unix)]
impl OsStrExt for OsStr {
fn starts_with(&self, s: &str) -> bool {
self.as_bytes().starts_with(s.as_bytes())
}
fn split_prefix(&self, s: &str) -> Option<OsString> {
let bytes = self.as_bytes();
if bytes.starts_with(s.as_bytes()) {
Some(OsStr::from_bytes(&bytes[s.len()..]).to_owned())
} else {
None
}
}
}
#[cfg(windows)]
use std::os::windows::ffi::{OsStrExt as _OsStrExt, OsStringExt};
#[cfg(windows)]
impl OsStrExt for OsStr {
fn starts_with(&self, s: &str) -> bool {
let u16s = self.encode_wide();
let mut utf8 = s.chars();
for codepoint in u16s {
let to_match = match utf8.next() {
Some(ch) => ch,
None => return true,
};
let to_match = to_match as u32;
let codepoint = codepoint as u32;
if to_match < 0xd7ff {
if to_match != codepoint {
return false;
}
} else {
return false;
}
}
utf8.next().is_none()
}
fn split_prefix(&self, s: &str) -> Option<OsString> {
let mut u16s = self.encode_wide().peekable();
let mut utf8 = s.chars();
while let Some(&codepoint) = u16s.peek() {
let to_match = match utf8.next() {
Some(ch) => ch,
None => {
let codepoints = u16s.collect::<Vec<_>>();
return Some(OsString::from_wide(&codepoints));
}
};
let to_match = to_match as u32;
let codepoint = codepoint as u32;
if to_match < 0xd7ff {
if to_match != codepoint {
return None;
}
} else {
return None;
}
u16s.next();
}
if utf8.next().is_none() {
Some(OsString::new())
} else {
None
}
}
}
#[cfg(unix)]
pub fn encode_path(dst: &mut dyn Write, path: &Path) -> std::io::Result<()> {
use std::os::unix::prelude::*;
let bytes = path.as_os_str().as_bytes();
dst.write_all(bytes)
}
#[cfg(windows)]
pub fn encode_path(dst: &mut dyn Write, path: &Path) -> std::io::Result<()> {
use std::os::windows::prelude::*;
let points = path.as_os_str().encode_wide().collect::<Vec<_>>();
let bytes = wide_char_to_multi_byte(&points)?; dst.write_all(&bytes)
}
#[cfg(unix)]
pub fn decode_path(bytes: &[u8]) -> std::io::Result<PathBuf> {
use std::os::unix::prelude::*;
Ok(OsStr::from_bytes(bytes).into())
}
#[cfg(windows)]
pub fn decode_path(bytes: &[u8]) -> std::io::Result<PathBuf> {
use windows_sys::Win32::Globalization::{CP_OEMCP, MB_ERR_INVALID_CHARS};
let codepage = CP_OEMCP;
let flags = MB_ERR_INVALID_CHARS;
Ok(OsString::from_wide(&multi_byte_to_wide_char(codepage, flags, bytes)?).into())
}
#[cfg(windows)]
pub fn wide_char_to_multi_byte(wide_char_str: &[u16]) -> std::io::Result<Vec<u8>> {
use windows_sys::Win32::Globalization::{CP_OEMCP, WideCharToMultiByte};
let codepage = CP_OEMCP;
let flags = 0;
if wide_char_str.is_empty() {
return Ok(Vec::new());
}
unsafe {
let len = WideCharToMultiByte(
codepage,
flags,
wide_char_str.as_ptr(),
wide_char_str.len() as i32,
std::ptr::null_mut(),
0,
std::ptr::null(),
std::ptr::null_mut(),
);
if len > 0 {
let mut astr: Vec<u8> = Vec::with_capacity(len as usize);
let len = WideCharToMultiByte(
codepage,
flags,
wide_char_str.as_ptr(),
wide_char_str.len() as i32,
astr.as_mut_ptr() as _,
len,
std::ptr::null(),
std::ptr::null_mut(),
);
if len > 0 {
astr.set_len(len as usize);
if (len as usize) == astr.len() {
return Ok(astr);
} else {
return Ok(astr[0..(len as usize)].to_vec());
}
}
}
Err(std::io::Error::last_os_error())
}
}
#[cfg(windows)]
pub fn multi_byte_to_wide_char(
codepage: u32,
flags: u32,
multi_byte_str: &[u8],
) -> std::io::Result<Vec<u16>> {
use windows_sys::Win32::Globalization::MultiByteToWideChar;
if multi_byte_str.is_empty() {
return Ok(vec![]);
}
unsafe {
let len = MultiByteToWideChar(
codepage,
flags,
multi_byte_str.as_ptr(),
multi_byte_str.len() as i32,
std::ptr::null_mut(),
0,
);
if len > 0 {
let mut wstr: Vec<u16> = Vec::with_capacity(len as usize);
let len = MultiByteToWideChar(
codepage,
flags,
multi_byte_str.as_ptr(),
multi_byte_str.len() as i32,
wstr.as_mut_ptr(),
len,
);
wstr.set_len(len as usize);
if len > 0 {
return Ok(wstr);
}
}
Err(std::io::Error::last_os_error())
}
}
#[derive(Serialize, Deserialize, Debug, Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub struct Timestamp {
seconds: i64,
nanoseconds: u32,
}
const NSEC_PER_SEC: u32 = 1_000_000_000;
impl From<std::time::SystemTime> for Timestamp {
fn from(system_time: std::time::SystemTime) -> Self {
let seconds;
let nanoseconds;
match system_time.duration_since(std::time::UNIX_EPOCH) {
Ok(duration) => {
seconds = duration.as_secs() as i64;
nanoseconds = duration.subsec_nanos();
}
Err(error) => {
let negative = error.duration();
let negative_secs = negative.as_secs() as i64;
let negative_nanos = negative.subsec_nanos();
if negative_nanos == 0 {
seconds = -negative_secs;
nanoseconds = 0;
} else {
seconds = -1 - negative_secs;
nanoseconds = NSEC_PER_SEC - negative_nanos;
}
}
}
Self {
seconds,
nanoseconds,
}
}
}
impl PartialEq<SystemTime> for Timestamp {
fn eq(&self, other: &SystemTime) -> bool {
self == &Self::from(*other)
}
}
impl Timestamp {
pub fn new(seconds: i64, nanoseconds: u32) -> Self {
Self {
seconds,
nanoseconds,
}
}
}
pub trait MetadataCtimeExt {
fn ctime_or_creation(&self) -> std::io::Result<Timestamp>;
}
impl MetadataCtimeExt for std::fs::Metadata {
#[cfg(unix)]
fn ctime_or_creation(&self) -> std::io::Result<Timestamp> {
use std::os::unix::prelude::MetadataExt;
Ok(Timestamp {
seconds: self.ctime(),
nanoseconds: self.ctime_nsec().try_into().unwrap_or(0),
})
}
#[cfg(windows)]
fn ctime_or_creation(&self) -> std::io::Result<Timestamp> {
self.created().map(Into::into)
}
}
pub struct HashToDigest<'a> {
pub digest: &'a mut Digest,
}
impl Hasher for HashToDigest<'_> {
fn write(&mut self, bytes: &[u8]) {
self.digest.update(bytes);
}
fn finish(&self) -> u64 {
panic!("not supposed to be called");
}
}
#[cfg(not(windows))]
pub fn daemonize() -> Result<()> {
use crate::jobserver::discard_inherited_jobserver;
use daemonix::Daemonize;
use std::env;
use std::mem;
match env::var("SCCACHE_NO_DAEMON") {
Ok(ref val) if val == "1" => {}
_ => {
Daemonize::new().start().context("failed to daemonize")?;
}
}
unsafe {
discard_inherited_jobserver();
}
static mut PREV_SIGSEGV: *mut libc::sigaction = std::ptr::null_mut();
static mut PREV_SIGBUS: *mut libc::sigaction = std::ptr::null_mut();
static mut PREV_SIGILL: *mut libc::sigaction = std::ptr::null_mut();
unsafe {
match env::var("SCCACHE_ALLOW_CORE_DUMPS") {
Ok(ref val) if val == "1" => {
let rlim = libc::rlimit {
rlim_cur: libc::RLIM_INFINITY,
rlim_max: libc::RLIM_INFINITY,
};
libc::setrlimit(libc::RLIMIT_CORE, &rlim);
}
_ => {}
}
PREV_SIGSEGV = Box::into_raw(Box::new(mem::zeroed::<libc::sigaction>()));
PREV_SIGBUS = Box::into_raw(Box::new(mem::zeroed::<libc::sigaction>()));
PREV_SIGILL = Box::into_raw(Box::new(mem::zeroed::<libc::sigaction>()));
let mut new: libc::sigaction = mem::zeroed();
new.sa_sigaction = (handler as *const libc::c_void).expose_provenance();
new.sa_flags = libc::SA_SIGINFO | libc::SA_RESTART;
libc::sigaction(libc::SIGSEGV, &new, &mut *PREV_SIGSEGV);
libc::sigaction(libc::SIGBUS, &new, &mut *PREV_SIGBUS);
libc::sigaction(libc::SIGILL, &new, &mut *PREV_SIGILL);
}
return Ok(());
extern "C" fn handler(
signum: libc::c_int,
_info: *mut libc::siginfo_t,
_ptr: *mut libc::c_void,
) {
use std::fmt::{Result, Write};
struct Stderr;
impl Write for Stderr {
fn write_str(&mut self, s: &str) -> Result {
unsafe {
let bytes = s.as_bytes();
libc::write(libc::STDERR_FILENO, bytes.as_ptr() as *const _, bytes.len());
Ok(())
}
}
}
unsafe {
let _ = writeln!(Stderr, "signal {} received", signum);
match signum {
libc::SIGBUS => libc::sigaction(signum, &*PREV_SIGBUS, std::ptr::null_mut()),
libc::SIGILL => libc::sigaction(signum, &*PREV_SIGILL, std::ptr::null_mut()),
_ => libc::sigaction(signum, &*PREV_SIGSEGV, std::ptr::null_mut()),
};
}
}
}
#[cfg(windows)]
pub fn daemonize() -> Result<()> {
Ok(())
}
#[cfg(any(feature = "dist-server", feature = "dist-client"))]
pub fn new_reqwest_blocking_client() -> reqwest::blocking::Client {
reqwest::blocking::Client::builder()
.pool_max_idle_per_host(0)
.build()
.expect("http client must build with success")
}
fn unhex(b: u8) -> std::io::Result<u8> {
match b {
b'0'..=b'9' => Ok(b - b'0'),
b'a'..=b'f' => Ok(b - b'a' + 10),
b'A'..=b'F' => Ok(b - b'A' + 10),
_ => Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"invalid hex digit",
)),
}
}
pub fn ascii_unescape_default(s: &[u8]) -> std::io::Result<Vec<u8>> {
let mut out = Vec::with_capacity(s.len() + 4);
let mut offset = 0;
while offset < s.len() {
let c = s[offset];
if c == b'\\' {
offset += 1;
if offset >= s.len() {
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"incomplete escape",
));
}
let c = s[offset];
match c {
b'n' => out.push(b'\n'),
b'r' => out.push(b'\r'),
b't' => out.push(b'\t'),
b'\'' => out.push(b'\''),
b'"' => out.push(b'"'),
b'\\' => out.push(b'\\'),
b'x' => {
offset += 1;
if offset + 1 >= s.len() {
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"incomplete hex escape",
));
}
let v = (unhex(s[offset])? << 4) | unhex(s[offset + 1])?;
out.push(v);
offset += 1;
}
_ => {
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"invalid escape",
));
}
}
} else {
out.push(c);
}
offset += 1;
}
Ok(out)
}
pub fn num_cpus() -> usize {
std::thread::available_parallelism().map_or(1, std::num::NonZeroUsize::get)
}
pub fn strip_basedirs<'a>(preprocessor_output: &'a [u8], basedirs: &[Vec<u8>]) -> Cow<'a, [u8]> {
if basedirs.is_empty() || preprocessor_output.is_empty() {
return Cow::Borrowed(preprocessor_output);
}
trace!(
"Stripping basedirs from preprocessor output with length {}",
preprocessor_output.len(),
);
let mut matches: Vec<(usize, usize, usize)> = Vec::new();
#[cfg(not(target_os = "windows"))]
let normalized_output = preprocessor_output;
#[cfg(target_os = "windows")]
let normalized_output = &normalize_win_path(preprocessor_output);
for (idx, basedir_bytes) in basedirs.iter().enumerate() {
let basedir = basedir_bytes.as_slice();
let finder = memchr::memmem::find_iter(normalized_output, &basedir);
for pos in finder {
let is_boundary = pos == 0
|| normalized_output[pos - 1].is_ascii_whitespace()
|| normalized_output[pos - 1] == b'"'
|| normalized_output[pos - 1] == b'<';
if is_boundary {
matches.push((pos, basedir.len(), idx));
}
}
}
if matches.is_empty() {
return Cow::Borrowed(preprocessor_output);
}
matches.sort_by(|a, b| a.0.cmp(&b.0).then(b.1.cmp(&a.1)));
let mut filtered_matches: Vec<(usize, usize)> = Vec::with_capacity(matches.len());
let mut last_end = 0;
for (pos, len, idx) in matches {
if pos >= last_end {
filtered_matches.push((pos, len));
last_end = pos + len;
trace!(
"Matched basedir {} at position {} with length {}",
String::from_utf8_lossy(&basedirs[idx]),
pos,
len
);
}
}
let mut result = Vec::with_capacity(preprocessor_output.len());
let mut current_pos = 0;
for (match_pos, match_len) in filtered_matches {
result.extend_from_slice(&preprocessor_output[current_pos..match_pos]);
current_pos = match_pos + match_len;
}
result.extend_from_slice(&preprocessor_output[current_pos..]);
Cow::Owned(result)
}
pub fn normalize_win_path(path: &[u8]) -> Vec<u8> {
let mut result = Vec::with_capacity(path.len());
let mut i = 0;
while i < path.len() {
let b = path[i];
if b < 128 {
result.push(match b {
b'A'..=b'Z' => b + (b'a' - b'A'),
b'\\' => b'/',
_ => b,
});
i += 1;
continue;
}
let char_len = match b {
0b1100_0000..=0b1101_1111 => 2, 0b1110_0000..=0b1110_1111 => 3, 0b1111_0000..=0b1111_0111 => 4, _ => {
result.push(b);
i += 1;
continue;
}
};
if i + char_len > path.len() {
result.push(b);
i += 1;
continue;
}
match std::str::from_utf8(&path[i..i + char_len]) {
Ok(s) => {
result.extend_from_slice(s.to_lowercase().as_bytes());
i += char_len;
}
Err(_) => {
result.push(b);
i += 1;
}
}
}
result
}
#[cfg(test)]
mod tests {
use super::{OsStrExt, TimeMacroFinder};
use std::ffi::{OsStr, OsString};
#[test]
fn simple_starts_with() {
let a: &OsStr = "foo".as_ref();
assert!(a.starts_with(""));
assert!(a.starts_with("f"));
assert!(a.starts_with("fo"));
assert!(a.starts_with("foo"));
assert!(!a.starts_with("foo2"));
assert!(!a.starts_with("b"));
assert!(!a.starts_with("b"));
let a: &OsStr = "".as_ref();
assert!(!a.starts_with("a"))
}
#[test]
fn simple_strip_prefix() {
let a: &OsStr = "foo".as_ref();
assert_eq!(a.split_prefix(""), Some(OsString::from("foo")));
assert_eq!(a.split_prefix("f"), Some(OsString::from("oo")));
assert_eq!(a.split_prefix("fo"), Some(OsString::from("o")));
assert_eq!(a.split_prefix("foo"), Some(OsString::from("")));
assert_eq!(a.split_prefix("foo2"), None);
assert_eq!(a.split_prefix("b"), None);
}
#[test]
fn test_time_macro_short_read() {
let mut finder = TimeMacroFinder::new();
finder.find_time_macros(b"__TIME__");
assert!(finder.found_time());
let mut finder = TimeMacroFinder::new();
finder.find_time_macros(b"__");
assert!(!finder.found_time());
finder.find_time_macros(b"TIME__");
assert!(finder.found_time());
let mut finder = TimeMacroFinder::new();
finder.find_time_macros(b"Something or other larger than the haystack");
finder.find_time_macros(b"__");
assert!(!finder.found_time());
finder.find_time_macros(b"TIME__");
assert!(finder.found_time());
let mut finder = TimeMacroFinder::new();
finder.find_time_macros(b"Something or other larger than the haystack");
finder.find_time_macros(b"__");
assert!(!finder.found_time());
finder.find_time_macros(b"TIME__ something or other larger than the haystack");
assert!(finder.found_time());
let mut finder = TimeMacroFinder::new();
finder.find_time_macros(b"__");
assert!(!finder.found_time());
finder.find_time_macros(b"TI");
assert!(!finder.found_time());
finder.find_time_macros(b"ME");
assert!(!finder.found_time());
finder.find_time_macros(b"__");
assert!(finder.found_time());
let mut finder = TimeMacroFinder::new();
finder.find_time_macros(b"This is larger than the haystack __");
assert!(!finder.found_time());
finder.find_time_macros(b"TI");
assert!(!finder.found_time());
finder.find_time_macros(b"ME");
assert!(!finder.found_time());
finder.find_time_macros(b"__");
assert!(finder.found_time());
let mut finder = TimeMacroFinder::new();
finder.find_time_macros(b"This is larger than the haystack __");
assert!(!finder.found_time());
finder.find_time_macros(b"TI");
assert!(!finder.found_time());
finder.find_time_macros(b"ME__ This is larger than the haystack");
assert!(finder.found_time());
assert!(!finder.found_timestamp());
finder.find_time_macros(b"__");
assert!(!finder.found_timestamp());
finder.find_time_macros(b"TIMESTAMP__ This is larger than the haystack");
assert!(finder.found_timestamp());
let mut finder = TimeMacroFinder::new();
finder.find_time_macros(b"__");
assert!(!finder.found_time());
finder.find_time_macros(b"TIME__ This is larger than the haystack");
assert!(finder.found_time());
assert!(!finder.found_timestamp());
finder.find_time_macros(b"__");
assert!(!finder.found_timestamp());
finder.find_time_macros(b"TIMESTAMP__ This is larger than the haystack");
assert!(finder.found_timestamp());
}
#[test]
fn test_ascii_unescape_default() {
let mut alphabet = r#"\\'"\t\n\r"#.as_bytes().to_vec();
alphabet.push(b'a');
alphabet.push(b'1');
alphabet.push(0);
alphabet.push(0xff);
let mut input = vec![];
let mut output = vec![];
let mut alphabet_indexes = [0; 3];
let mut tested_cases = 0;
loop {
input.clear();
output.clear();
for idx in alphabet_indexes {
if idx < alphabet.len() {
input.push(alphabet[idx]);
}
}
if input.is_empty() {
break;
}
output.extend(input.as_slice().escape_ascii());
let result = super::ascii_unescape_default(&output).unwrap();
assert_eq!(input, result, "{:?}", output);
tested_cases += 1;
for idx in &mut alphabet_indexes {
*idx += 1;
if *idx > alphabet.len() {
*idx = 0;
} else {
break;
}
}
}
assert_eq!(tested_cases, (alphabet.len() + 1).pow(3) - 1);
let empty_result = super::ascii_unescape_default(&[]).unwrap();
assert!(empty_result.is_empty(), "{:?}", empty_result);
}
#[test]
fn test_strip_basedir_simple() {
let basedir = b"/home/user/project/".to_vec();
let input = b"# 1 \"/home/user/project/src/main.c\"\nint main() { return 0; }";
let output = super::strip_basedirs(input, std::slice::from_ref(&basedir));
let expected = b"# 1 \"src/main.c\"\nint main() { return 0; }";
assert_eq!(&*output, expected);
let input =
b"# 1 \"/home/user/project/src/main.c\"\n# 2 \"/home/user/project/include/header.h\"";
let output = super::strip_basedirs(input, std::slice::from_ref(&basedir));
let expected = b"# 1 \"src/main.c\"\n# 2 \"include/header.h\"";
assert_eq!(&*output, expected);
let input = b"# 1 \"/other/path/main.c\"\nint main() { return 0; }";
let output = super::strip_basedirs(input, std::slice::from_ref(&basedir));
assert_eq!(&*output, input);
}
#[test]
fn test_strip_basedir_empty() {
let input = b"# 1 \"/home/user/project/src/main.c\"";
let output = super::strip_basedirs(input, &[]);
assert_eq!(&*output, input);
let basedir = b"/home/user/project/".to_vec();
let input = b"";
let output = super::strip_basedirs(input, std::slice::from_ref(&basedir));
assert_eq!(&*output, input);
}
#[test]
fn test_strip_basedir_not_at_boundary() {
let basedir = b"/home/user/".to_vec();
let input = b"text/home/user/file.c and \"/home/user/other.c\"";
let output = super::strip_basedirs(input, std::slice::from_ref(&basedir));
let expected = b"text/home/user/file.c and \"other.c\"";
assert_eq!(&*output, expected);
}
#[test]
fn test_strip_basedir_trailing_slashes() {
let basedir = b"/home/user/project".to_vec();
let input = b"# 1 \"/home/user/project/src/main.c\"";
let output = super::strip_basedirs(input, std::slice::from_ref(&basedir));
let expected = b"# 1 \"/src/main.c\""; assert_eq!(&*output, expected);
let basedir = b"/home/user/project/".to_vec();
let input = b"# 1 \"/home/user/project/src/main.c\"";
let output = super::strip_basedirs(input, std::slice::from_ref(&basedir));
let expected = b"# 1 \"src/main.c\"";
assert_eq!(&*output, expected);
}
#[test]
fn test_strip_basedirs_multiple() {
let basedirs = vec![
b"/home/user1/project/".to_vec(),
b"/home/user2/workspace/".to_vec(),
];
let input =
b"# 1 \"/home/user1/project/src/main.c\"\n# 2 \"/home/user2/workspace/lib/util.c\"";
let output = super::strip_basedirs(input, &basedirs);
let expected = b"# 1 \"src/main.c\"\n# 2 \"lib/util.c\"";
assert_eq!(&*output, expected);
let basedirs = vec![b"/home/user/".to_vec(), b"/home/user/project/".to_vec()];
let input = b"# 1 \"/home/user/project/src/main.c\"";
let output = super::strip_basedirs(input, &basedirs);
let expected = b"# 1 \"src/main.c\"";
assert_eq!(&*output, expected);
}
#[cfg(target_os = "windows")]
#[test]
fn test_strip_basedir_windows_backslashes() {
let basedir = b"c:/users/test/project".to_vec();
let input = b"# 1 \"C:\\Users\\test\\project\\Src\\Main.c\"";
let output = super::strip_basedirs(input, std::slice::from_ref(&basedir));
let expected = b"# 1 \"\\Src\\Main.c\""; assert_eq!(&*output, expected);
let basedir = b"c:/users/test/project/".to_vec();
let input = b"# 1 \"C:\\Users\\test\\project\\src\\main.c\"";
let output = super::strip_basedirs(input, std::slice::from_ref(&basedir));
let expected = b"# 1 \"src\\main.c\"";
assert_eq!(&*output, expected);
}
#[cfg(target_os = "windows")]
#[test]
fn test_strip_basedir_windows_mixed_slashes() {
let basedir = b"c:/users/test/project/".to_vec();
let input = b"# 1 \"C:/Users\\test\\project\\src/main.c\"";
let output = super::strip_basedirs(input, std::slice::from_ref(&basedir));
let expected = b"# 1 \"src/main.c\"";
assert_eq!(&*output, expected, "Failed to strip mixed slash path");
let input = b"# 1 \"C:\\Users/test/project/src\\main.c\"";
let output = super::strip_basedirs(input, std::slice::from_ref(&basedir));
let expected = b"# 1 \"src\\main.c\"";
assert_eq!(
&*output, expected,
"Failed to strip reverse mixed slash path"
);
}
#[test]
fn test_normalize_win_path_ascii() {
let input = b"C:\\Users\\Test\\Project";
let normalized = super::normalize_win_path(input);
assert_eq!(normalized, b"c:/users/test/project");
let input = b"C:\\USERS\\test\\PROJECT";
let normalized = super::normalize_win_path(input);
assert_eq!(normalized, b"c:/users/test/project");
}
#[test]
fn test_normalize_win_path_utf8() {
let input = "C:\\Users\\Müller\\Projekt".as_bytes();
let normalized = super::normalize_win_path(input);
let expected = "c:/users/müller/projekt".as_bytes();
assert_eq!(normalized, expected);
let input = "C:\\Пользователь\\Проект".as_bytes();
let normalized = super::normalize_win_path(input);
let expected = "c:/пользователь/проект".as_bytes();
assert_eq!(normalized, expected);
let input = "C:\\İstanbul\\DİREKTÖRY".as_bytes();
let normalized = super::normalize_win_path(input);
let expected = "c:/i\u{307}stanbul/di\u{307}rektöry".as_bytes();
assert_eq!(normalized, expected);
}
#[test]
fn test_normalize_win_path_mixed_ascii_utf8() {
let input = "C:\\Users\\Test\\Café\\Проект".as_bytes();
let normalized = super::normalize_win_path(input);
let expected = "c:/users/test/café/проект".as_bytes();
assert_eq!(normalized, expected);
}
#[test]
fn test_normalize_win_path_invalid_utf8() {
let mut input = b"C:\\Users\\".to_vec();
input.push(0xFF); input.extend_from_slice(b"\\Test");
let normalized = super::normalize_win_path(&input);
let mut expected = b"c:/users/".to_vec();
expected.push(0xFF);
expected.extend_from_slice(b"/test");
assert_eq!(normalized, expected);
}
#[test]
fn test_normalize_win_path_incomplete_utf8() {
let mut input = b"C:\\Users\\Test".to_vec();
input.push(0xC3);
let normalized = super::normalize_win_path(&input);
let mut expected = b"c:/users/test".to_vec();
expected.push(0xC3);
assert_eq!(normalized, expected);
}
#[test]
fn test_normalize_win_path_empty() {
let input = b"";
let normalized = super::normalize_win_path(input);
assert_eq!(normalized, b"");
}
}