use std::fs::File;
use std::io;
use std::path::Path;
#[cfg(unix)]
use std::os::unix::io::AsRawFd;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[allow(dead_code)] pub struct DataRegion {
pub offset: u64,
pub length: u64,
}
#[cfg(unix)]
#[allow(dead_code)] pub fn detect_data_regions(path: &Path) -> io::Result<Vec<DataRegion>> {
const SEEK_DATA: i32 = 3; const SEEK_HOLE: i32 = 4;
let file = File::open(path)?;
let file_size = file.metadata()?.len();
if file_size == 0 {
return Ok(Vec::new());
}
let fd = file.as_raw_fd();
let file_size_i64 = file_size as i64;
let first_data = unsafe { libc::lseek(fd, 0, SEEK_DATA) };
if first_data < 0 {
let err = io::Error::last_os_error();
let errno = err.raw_os_error();
if errno == Some(libc::EINVAL) {
return Err(err);
}
if errno == Some(libc::ENXIO) {
return Err(io::Error::new(io::ErrorKind::Unsupported, "SEEK_DATA not properly supported (got ENXIO)"));
}
return Err(err);
}
let mut regions = Vec::new();
let mut pos: i64 = 0;
while pos < file_size_i64 {
let data_start = unsafe { libc::lseek(fd, pos, SEEK_DATA) };
if data_start < 0 {
break; }
if data_start >= file_size_i64 {
break;
}
let hole_start = unsafe { libc::lseek(fd, data_start, SEEK_HOLE) };
let data_end = if hole_start < 0 || hole_start > file_size_i64 { file_size_i64 } else { hole_start };
regions.push(DataRegion { offset: data_start as u64, length: (data_end - data_start) as u64 });
pos = data_end;
}
Ok(regions)
}
#[cfg(not(unix))]
pub fn detect_data_regions(_path: &Path) -> io::Result<Vec<DataRegion>> {
Err(io::Error::new(io::ErrorKind::Unsupported, "Sparse file detection not supported on this platform"))
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
#[test]
#[cfg(unix)]
#[ignore] fn test_detect_data_regions_all_data() {
let temp = TempDir::new().unwrap();
let file_path = temp.path().join("all_data.txt");
std::fs::write(&file_path, b"Hello, world!").unwrap();
let regions = detect_data_regions(&file_path);
match regions {
Ok(r) => {
assert_eq!(r.len(), 1);
assert_eq!(r[0].offset, 0);
assert_eq!(r[0].length, 13);
}
Err(e) if e.raw_os_error() == Some(libc::EINVAL) || e.kind() == std::io::ErrorKind::Unsupported => {
}
Err(e) => panic!("Unexpected error: {}", e),
}
}
#[test]
#[cfg(unix)]
fn test_detect_data_regions_empty_file() {
let temp = TempDir::new().unwrap();
let file_path = temp.path().join("empty.txt");
File::create(&file_path).unwrap();
let regions = detect_data_regions(&file_path).unwrap();
assert_eq!(regions.len(), 0);
}
#[test]
#[cfg(unix)]
#[ignore] fn test_detect_data_regions_sparse_file() {
use std::process::Command;
let temp = TempDir::new().unwrap();
let file_path = temp.path().join("sparse.dat");
let output = Command::new("dd")
.args([
"if=/dev/zero",
&format!("of={}", file_path.display()),
"bs=1024",
"count=0",
"seek=10240", ])
.output();
if output.is_err() || !file_path.exists() {
return;
}
let mut file = std::fs::OpenOptions::new().write(true).open(&file_path).unwrap();
use std::io::Write;
file.write_all(&vec![0x42; 4096]).unwrap();
drop(file);
let regions = detect_data_regions(&file_path);
match regions {
Ok(r) => {
assert!(!r.is_empty(), "Should have at least one data region");
assert!(r[0].offset < 8192, "First region should be near start");
}
Err(e) if e.raw_os_error() == Some(libc::EINVAL) || e.kind() == std::io::ErrorKind::Unsupported => {
}
Err(e) => panic!("Unexpected error: {}", e),
}
}
#[test]
#[cfg(unix)]
fn test_detect_data_regions_nonexistent_file() {
let temp = TempDir::new().unwrap();
let file_path = temp.path().join("nonexistent.txt");
let result = detect_data_regions(&file_path);
assert!(result.is_err());
let err = result.unwrap_err();
assert_eq!(err.kind(), io::ErrorKind::NotFound);
}
#[test]
#[cfg(unix)]
#[ignore] fn test_detect_data_regions_leading_hole() {
use std::io::{Seek, SeekFrom, Write};
let temp = TempDir::new().unwrap();
let file_path = temp.path().join("leading_hole.dat");
let mut file = File::create(&file_path).unwrap();
file.seek(SeekFrom::Start(1024 * 1024)).unwrap();
file.write_all(b"Data after hole").unwrap();
drop(file);
let regions = detect_data_regions(&file_path);
match regions {
Ok(r) => {
assert!(!r.is_empty(), "Should have at least one data region");
assert!(r[0].offset >= 1024 * 1024, "First region should start at/after 1MB, got offset: {}", r[0].offset);
}
Err(e) if e.raw_os_error() == Some(libc::EINVAL) || e.kind() == io::ErrorKind::Unsupported => {
}
Err(e) => panic!("Unexpected error: {}", e),
}
}
#[test]
#[cfg(unix)]
#[ignore] fn test_detect_data_regions_trailing_hole() {
use std::io::{Seek, SeekFrom, Write};
let temp = TempDir::new().unwrap();
let file_path = temp.path().join("trailing_hole.dat");
let mut file = File::create(&file_path).unwrap();
file.write_all(b"Data at start").unwrap();
file.seek(SeekFrom::Start(1024 * 1024)).unwrap();
file.write_all(&[0]).unwrap(); drop(file);
let regions = detect_data_regions(&file_path);
match regions {
Ok(r) => {
assert!(!r.is_empty(), "Should have at least one data region");
assert!(r[0].offset < 1024, "First region should be at start, got offset: {}", r[0].offset);
}
Err(e) if e.raw_os_error() == Some(libc::EINVAL) || e.kind() == io::ErrorKind::Unsupported => {
}
Err(e) => panic!("Unexpected error: {}", e),
}
}
#[test]
#[cfg(unix)]
#[ignore] fn test_detect_data_regions_multiple_data_regions() {
use std::io::{Seek, SeekFrom, Write};
let temp = TempDir::new().unwrap();
let file_path = temp.path().join("multiple_regions.dat");
let mut file = File::create(&file_path).unwrap();
file.write_all(&vec![0x41; 4096]).unwrap();
file.seek(SeekFrom::Start(1024 * 1024)).unwrap();
file.write_all(&vec![0x42; 4096]).unwrap();
file.seek(SeekFrom::Start(2 * 1024 * 1024)).unwrap();
file.write_all(&vec![0x43; 4096]).unwrap();
drop(file);
let regions = detect_data_regions(&file_path);
match regions {
Ok(r) => {
assert!(r.len() >= 2, "Should have multiple data regions, got {}", r.len());
for i in 0..r.len() - 1 {
assert!(
r[i].offset < r[i + 1].offset,
"Regions should be ordered: region {} offset {} >= region {} offset {}",
i,
r[i].offset,
i + 1,
r[i + 1].offset
);
}
}
Err(e) if e.raw_os_error() == Some(libc::EINVAL) || e.kind() == io::ErrorKind::Unsupported => {
}
Err(e) => panic!("Unexpected error: {}", e),
}
}
#[test]
#[cfg(unix)]
#[ignore] fn test_detect_data_regions_very_large_offset() {
use std::io::{Seek, SeekFrom, Write};
let temp = TempDir::new().unwrap();
let file_path = temp.path().join("large_offset.dat");
let mut file = File::create(&file_path).unwrap();
let large_offset = 1024 * 1024 * 1024u64;
file.seek(SeekFrom::Start(large_offset)).unwrap();
file.write_all(b"Far away data").unwrap();
drop(file);
let regions = detect_data_regions(&file_path);
match regions {
Ok(r) => {
assert!(!r.is_empty(), "Should have at least one data region");
assert!(
r[0].offset >= large_offset - 4096, "First region should be at large offset, got: {}",
r[0].offset
);
}
Err(e) if e.raw_os_error() == Some(libc::EINVAL) || e.kind() == io::ErrorKind::Unsupported => {
}
Err(e) => panic!("Unexpected error: {}", e),
}
}
#[test]
#[cfg(unix)]
#[ignore] fn test_detect_data_regions_single_byte() {
use std::io::{Seek, SeekFrom, Write};
let temp = TempDir::new().unwrap();
let file_path = temp.path().join("single_byte.dat");
let mut file = File::create(&file_path).unwrap();
file.seek(SeekFrom::Start(1024 * 1024)).unwrap();
file.write_all(&[0x99]).unwrap();
file.seek(SeekFrom::Start(2 * 1024 * 1024)).unwrap();
file.write_all(&[0]).unwrap();
drop(file);
let regions = detect_data_regions(&file_path);
match regions {
Ok(r) => {
assert!(!r.is_empty(), "Should have at least one data region");
}
Err(e) if e.raw_os_error() == Some(libc::EINVAL) || e.kind() == io::ErrorKind::Unsupported => {
}
Err(e) => panic!("Unexpected error: {}", e),
}
}
#[test]
#[cfg(unix)]
fn test_detect_data_regions_region_ordering() {
let temp = TempDir::new().unwrap();
let file_path = temp.path().join("ordering.dat");
std::fs::write(&file_path, b"Test data for ordering").unwrap();
let regions = detect_data_regions(&file_path);
match regions {
Ok(r) if !r.is_empty() => {
for i in 0..r.len() - 1 {
assert!(r[i].offset < r[i + 1].offset, "Regions must be ordered by offset");
assert!(r[i].offset + r[i].length <= r[i + 1].offset, "Regions must not overlap");
}
for region in r.iter() {
assert!(region.length > 0, "Regions must have non-zero length");
}
}
Ok(_) => {
}
Err(e) if e.raw_os_error() == Some(libc::EINVAL) || e.kind() == io::ErrorKind::Unsupported => {
}
Err(e) => panic!("Unexpected error: {}", e),
}
}
#[test]
#[cfg(unix)]
fn test_detect_data_regions_boundary_conditions() {
let temp = TempDir::new().unwrap();
let file1 = temp.path().join("at_zero.dat");
std::fs::write(&file1, b"At zero").unwrap();
let regions = detect_data_regions(&file1);
if let Ok(r) = regions
&& !r.is_empty()
{
assert_eq!(r[0].offset, 0, "First region should start at 0");
}
let file2 = temp.path().join("one_byte.dat");
std::fs::write(&file2, b"X").unwrap();
let regions = detect_data_regions(&file2);
if let Ok(r) = regions
&& !r.is_empty()
{
assert_eq!(r[0].offset, 0);
assert!(r[0].length >= 1, "Should contain at least 1 byte");
}
}
#[test]
#[cfg(not(unix))]
fn test_detect_data_regions_unsupported_platform() {
let temp = TempDir::new().unwrap();
let file_path = temp.path().join("test.dat");
std::fs::write(&file_path, b"test").unwrap();
let result = detect_data_regions(&file_path);
assert!(result.is_err());
let err = result.unwrap_err();
assert_eq!(err.kind(), io::ErrorKind::Unsupported);
}
#[test]
fn test_data_region_serialization() {
let region = DataRegion { offset: 1024, length: 4096 };
let json = serde_json::to_string(®ion).unwrap();
let deserialized: DataRegion = serde_json::from_str(&json).unwrap();
assert_eq!(region, deserialized);
assert_eq!(deserialized.offset, 1024);
assert_eq!(deserialized.length, 4096);
}
}