Skip to main content

coreutils_rs/dd/
core.rs

1use std::fs::{File, OpenOptions};
2use std::io::{self, Read, Seek, SeekFrom, Write};
3use std::time::Instant;
4
5/// Status output level for dd.
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
7pub enum StatusLevel {
8    /// Print transfer stats at end (default).
9    #[default]
10    Default,
11    /// No informational messages to stderr.
12    None,
13    /// Print periodic transfer stats (like GNU dd `status=progress`).
14    Progress,
15    /// Like default but also suppress error messages.
16    NoError,
17    /// Print record counts but suppress transfer speed/bytes line.
18    NoXfer,
19}
20
21/// Conversion flags for dd (`conv=` option).
22#[derive(Debug, Clone, Default)]
23pub struct DdConv {
24    /// Convert to lowercase.
25    pub lcase: bool,
26    /// Convert to uppercase.
27    pub ucase: bool,
28    /// Swap every pair of input bytes.
29    pub swab: bool,
30    /// Continue after read errors.
31    pub noerror: bool,
32    /// Do not truncate the output file.
33    pub notrunc: bool,
34    /// Pad every input block with NULs to ibs-size.
35    pub sync: bool,
36    /// Call fdatasync on output before finishing.
37    pub fdatasync: bool,
38    /// Call fsync on output before finishing.
39    pub fsync: bool,
40    /// Fail if the output file already exists.
41    pub excl: bool,
42    /// Do not create the output file.
43    pub nocreat: bool,
44    /// Convert fixed-length records to newline-terminated (unblock).
45    pub unblock: bool,
46    /// Convert newline-terminated records to fixed-length (block).
47    pub block: bool,
48}
49
50/// Input/output flags for dd (`iflag=`/`oflag=` options).
51#[derive(Debug, Clone, Default)]
52pub struct DdFlags {
53    pub append: bool,
54    pub direct: bool,
55    pub directory: bool,
56    pub dsync: bool,
57    pub sync: bool,
58    pub fullblock: bool,
59    pub nonblock: bool,
60    pub noatime: bool,
61    pub nocache: bool,
62    pub noctty: bool,
63    pub nofollow: bool,
64    pub count_bytes: bool,
65    pub skip_bytes: bool,
66}
67
68/// Configuration for a dd operation.
69#[derive(Debug, Clone)]
70pub struct DdConfig {
71    /// Input file path (None = stdin).
72    pub input: Option<String>,
73    /// Output file path (None = stdout).
74    pub output: Option<String>,
75    /// Input block size in bytes.
76    pub ibs: usize,
77    /// Output block size in bytes.
78    pub obs: usize,
79    /// Conversion block size (for block/unblock).
80    pub cbs: usize,
81    /// Copy only this many input blocks (None = unlimited).
82    pub count: Option<u64>,
83    /// Skip this many ibs-sized blocks at start of input.
84    pub skip: u64,
85    /// Skip this many obs-sized blocks at start of output.
86    pub seek: u64,
87    /// Conversion options.
88    pub conv: DdConv,
89    /// Status output level.
90    pub status: StatusLevel,
91    /// Input flags.
92    pub iflag: DdFlags,
93    /// Output flags.
94    pub oflag: DdFlags,
95}
96
97impl Default for DdConfig {
98    fn default() -> Self {
99        DdConfig {
100            input: None,
101            output: None,
102            ibs: 512,
103            obs: 512,
104            cbs: 0,
105            count: None,
106            skip: 0,
107            seek: 0,
108            conv: DdConv::default(),
109            status: StatusLevel::default(),
110            iflag: DdFlags::default(),
111            oflag: DdFlags::default(),
112        }
113    }
114}
115
116/// Statistics from a dd copy operation.
117#[derive(Debug, Clone, Default)]
118pub struct DdStats {
119    /// Number of full input blocks read.
120    pub records_in_full: u64,
121    /// Number of partial input blocks read.
122    pub records_in_partial: u64,
123    /// Number of full output blocks written.
124    pub records_out_full: u64,
125    /// Number of partial output blocks written.
126    pub records_out_partial: u64,
127    /// Total bytes copied.
128    pub bytes_copied: u64,
129}
130
131/// Parse a GNU dd SIZE string with optional suffix and `x` multiplier.
132///
133/// Suffix conventions (matching GNU dd):
134///   - Single letter = binary (powers of 1024): k/K, M, G, T, P, E
135///   - `xB` suffix = decimal (powers of 1000): kB, KB, MB, GB, TB, PB, EB
136///   - `xIB` suffix = explicit binary: KiB, MiB, GiB, TiB, PiB, EiB
137///   - Special: c (1), w (2), b (512)
138///
139/// The `x` operator multiplies terms and chains recursively,
140/// so `1x2x4` = 1 * (2 * 4) = 8.
141pub fn parse_size(s: &str) -> Result<u64, String> {
142    let s = s.trim();
143    if s.is_empty() {
144        return Err("empty size string".to_string());
145    }
146
147    // GNU dd supports 'x' as multiplication: e.g. "2x512", "1Mx2", "1x2x4"
148    // Split on first 'x' and recurse on the right side for chaining.
149    if let Some(pos) = s.find('x') {
150        let left = parse_size_single(&s[..pos])?;
151        let right = parse_size(&s[pos + 1..])?;
152        return left
153            .checked_mul(right)
154            .ok_or_else(|| format!("size overflow: {} * {}", left, right));
155    }
156
157    parse_size_single(s)
158}
159
160fn parse_size_single(s: &str) -> Result<u64, String> {
161    if s.is_empty() {
162        return Err("empty size string".to_string());
163    }
164
165    // Find where the numeric part ends
166    let num_end = s.find(|c: char| !c.is_ascii_digit()).unwrap_or(s.len());
167
168    if num_end == 0 {
169        return Err(format!("invalid number: '{}'", s));
170    }
171
172    let num: u64 = s[..num_end]
173        .parse()
174        .map_err(|e| format!("invalid number '{}': {}", &s[..num_end], e))?;
175
176    let suffix = &s[num_end..];
177    // GNU dd suffix convention: single letter = binary (powers of 1024),
178    // xB suffix = decimal (powers of 1000), xIB suffix = binary (explicit).
179    let multiplier: u64 = match suffix {
180        "" => 1,
181        "c" => 1,
182        "w" => 2,
183        "b" => 512,
184        "k" | "K" => 1024,
185        "kB" | "KB" => 1000,
186        "KiB" => 1024,
187        "M" => 1_048_576,
188        "MB" => 1_000_000,
189        "MiB" => 1_048_576,
190        "G" => 1_073_741_824,
191        "GB" => 1_000_000_000,
192        "GiB" => 1_073_741_824,
193        "T" => 1_099_511_627_776,
194        "TB" => 1_000_000_000_000,
195        "TiB" => 1_099_511_627_776,
196        "P" => 1_125_899_906_842_624,
197        "PB" => 1_000_000_000_000_000,
198        "PiB" => 1_125_899_906_842_624,
199        "E" => 1_152_921_504_606_846_976,
200        "EB" => 1_000_000_000_000_000_000,
201        "EiB" => 1_152_921_504_606_846_976,
202        _ => return Err(format!("invalid suffix: '{}'", suffix)),
203    };
204
205    num.checked_mul(multiplier)
206        .ok_or_else(|| format!("size overflow: {} * {}", num, multiplier))
207}
208
209/// Parse dd command-line arguments (key=value pairs).
210pub fn parse_dd_args(args: &[String]) -> Result<DdConfig, String> {
211    let mut config = DdConfig::default();
212    let mut bs_set = false;
213
214    for arg in args {
215        if let Some((key, value)) = arg.split_once('=') {
216            match key {
217                "if" => config.input = Some(value.to_string()),
218                "of" => config.output = Some(value.to_string()),
219                "bs" => {
220                    let size = parse_size(value)? as usize;
221                    config.ibs = size;
222                    config.obs = size;
223                    bs_set = true;
224                }
225                "ibs" => {
226                    if !bs_set {
227                        config.ibs = parse_size(value)? as usize;
228                    }
229                }
230                "obs" => {
231                    if !bs_set {
232                        config.obs = parse_size(value)? as usize;
233                    }
234                }
235                "cbs" => config.cbs = parse_size(value)? as usize,
236                "count" => config.count = Some(parse_size(value)?),
237                "skip" => config.skip = parse_size(value)?,
238                "seek" => config.seek = parse_size(value)?,
239                "conv" => {
240                    for flag in value.split(',') {
241                        match flag {
242                            "lcase" => config.conv.lcase = true,
243                            "ucase" => config.conv.ucase = true,
244                            "swab" => config.conv.swab = true,
245                            "noerror" => config.conv.noerror = true,
246                            "notrunc" => config.conv.notrunc = true,
247                            "sync" => config.conv.sync = true,
248                            "fdatasync" => config.conv.fdatasync = true,
249                            "fsync" => config.conv.fsync = true,
250                            "excl" => config.conv.excl = true,
251                            "nocreat" => config.conv.nocreat = true,
252                            "block" => config.conv.block = true,
253                            "unblock" => config.conv.unblock = true,
254                            "" => {}
255                            _ => return Err(format!("invalid conversion: '{}'", flag)),
256                        }
257                    }
258                }
259                "iflag" => {
260                    for flag in value.split(',') {
261                        parse_flag(flag, &mut config.iflag)?;
262                    }
263                }
264                "oflag" => {
265                    for flag in value.split(',') {
266                        parse_flag(flag, &mut config.oflag)?;
267                    }
268                }
269                "status" => {
270                    config.status = match value {
271                        "none" => StatusLevel::None,
272                        "noxfer" => StatusLevel::NoXfer,
273                        "noerror" => StatusLevel::NoError,
274                        "progress" => StatusLevel::Progress,
275                        _ => return Err(format!("invalid status level: '{}'", value)),
276                    };
277                }
278                _ => return Err(format!("unrecognized operand: '{}'", arg)),
279            }
280        } else {
281            return Err(format!("unrecognized operand: '{}'", arg));
282        }
283    }
284
285    // Validate conflicting options
286    if config.conv.lcase && config.conv.ucase {
287        return Err("conv=lcase and conv=ucase are mutually exclusive".to_string());
288    }
289    if config.conv.excl && config.conv.nocreat {
290        return Err("conv=excl and conv=nocreat are mutually exclusive".to_string());
291    }
292
293    Ok(config)
294}
295
296/// Parse a single iflag/oflag value into the DdFlags struct.
297fn parse_flag(flag: &str, flags: &mut DdFlags) -> Result<(), String> {
298    match flag {
299        "append" => flags.append = true,
300        "direct" => flags.direct = true,
301        "directory" => flags.directory = true,
302        "dsync" => flags.dsync = true,
303        "sync" => flags.sync = true,
304        "fullblock" => flags.fullblock = true,
305        "nonblock" => flags.nonblock = true,
306        "noatime" => flags.noatime = true,
307        "nocache" => flags.nocache = true,
308        "noctty" => flags.noctty = true,
309        "nofollow" => flags.nofollow = true,
310        "count_bytes" => flags.count_bytes = true,
311        "skip_bytes" => flags.skip_bytes = true,
312        "" => {}
313        _ => return Err(format!("invalid flag: '{}'", flag)),
314    }
315    Ok(())
316}
317
318/// Read a full block from the reader, retrying on partial reads.
319/// Returns the number of bytes actually read (0 means EOF).
320fn read_full_block(reader: &mut dyn Read, buf: &mut [u8]) -> io::Result<usize> {
321    let mut total = 0;
322    while total < buf.len() {
323        match reader.read(&mut buf[total..]) {
324            Ok(0) => break,
325            Ok(n) => total += n,
326            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
327            Err(e) => return Err(e),
328        }
329    }
330    Ok(total)
331}
332
333/// Apply conversion options to a data block in-place.
334pub fn apply_conversions(data: &mut [u8], conv: &DdConv) {
335    if conv.swab {
336        // Swap every pair of bytes using u64 word-at-a-time processing.
337        // Process 8 bytes at a time: rotate each u16 pair within the u64.
338        let (prefix, chunks, suffix) = unsafe { data.align_to_mut::<u64>() };
339        // Handle unaligned prefix bytes
340        let pairs_pre = prefix.len() / 2;
341        for i in 0..pairs_pre {
342            prefix.swap(i * 2, i * 2 + 1);
343        }
344        // Process aligned u64 chunks: swap adjacent bytes in each pair
345        // For each u64 AABBCCDD_EEFFGGHH, we want BBAADDCC_FFEEHHGG
346        // This is: ((x & 0xFF00FF00FF00FF00) >> 8) | ((x & 0x00FF00FF00FF00FF) << 8)
347        for w in chunks.iter_mut() {
348            let x = *w;
349            *w = ((x & 0xFF00FF00FF00FF00) >> 8) | ((x & 0x00FF00FF00FF00FF) << 8);
350        }
351        // Handle remaining suffix bytes
352        let pairs_suf = suffix.len() / 2;
353        for i in 0..pairs_suf {
354            suffix.swap(i * 2, i * 2 + 1);
355        }
356    }
357
358    if conv.lcase {
359        for b in data.iter_mut() {
360            b.make_ascii_lowercase();
361        }
362    } else if conv.ucase {
363        for b in data.iter_mut() {
364            b.make_ascii_uppercase();
365        }
366    }
367}
368
369/// Skip input blocks by reading and discarding them.
370fn skip_input(reader: &mut dyn Read, blocks: u64, block_size: usize) -> io::Result<()> {
371    let mut discard_buf = vec![0u8; block_size];
372    for _ in 0..blocks {
373        let n = read_full_block(reader, &mut discard_buf)?;
374        if n == 0 {
375            break;
376        }
377    }
378    Ok(())
379}
380
381/// Skip input by reading and discarding exactly `bytes` bytes.
382fn skip_input_bytes(reader: &mut dyn Read, bytes: u64) -> io::Result<()> {
383    let mut remaining = bytes;
384    let mut discard_buf = [0u8; 8192];
385    while remaining > 0 {
386        let chunk = std::cmp::min(remaining, discard_buf.len() as u64) as usize;
387        let n = reader.read(&mut discard_buf[..chunk])?;
388        if n == 0 {
389            break;
390        }
391        remaining -= n as u64;
392    }
393    Ok(())
394}
395
396/// Skip input blocks by seeking (for seekable file inputs).
397fn skip_input_seek(file: &mut File, blocks: u64, block_size: usize) -> io::Result<()> {
398    let offset = blocks * block_size as u64;
399    file.seek(SeekFrom::Start(offset))?;
400    Ok(())
401}
402
403/// Seek output by writing zero blocks (for non-seekable outputs) or using seek.
404fn seek_output(writer: &mut Box<dyn Write>, seek_blocks: u64, block_size: usize) -> io::Result<()> {
405    // Try to seek if the writer supports it. Since we use Box<dyn Write>,
406    // we write zero blocks for the general case.
407    let zero_block = vec![0u8; block_size];
408    for _ in 0..seek_blocks {
409        writer.write_all(&zero_block)?;
410    }
411    Ok(())
412}
413
414/// Seek output on a file using actual file seeking.
415fn seek_output_file(file: &mut File, seek_blocks: u64, block_size: usize) -> io::Result<()> {
416    let offset = seek_blocks * block_size as u64;
417    file.seek(SeekFrom::Start(offset))?;
418    Ok(())
419}
420
421/// Check if any data conversion options are enabled.
422#[cfg(target_os = "linux")]
423fn has_conversions(conv: &DdConv) -> bool {
424    conv.lcase || conv.ucase || conv.swab || conv.sync || conv.block || conv.unblock
425}
426
427/// Check if any iflag/oflag fields require the generic path.
428/// Note: noatime is excluded because the raw path already uses O_NOATIME.
429/// fullblock is excluded because the raw read loop already reads full blocks.
430#[cfg(target_os = "linux")]
431fn has_flags(flags: &DdFlags) -> bool {
432    flags.append
433        || flags.direct
434        || flags.directory
435        || flags.dsync
436        || flags.sync
437        || flags.nonblock
438        || flags.nocache
439        || flags.noctty
440        || flags.nofollow
441        || flags.count_bytes
442        || flags.skip_bytes
443}
444
445/// Raw-syscall fast path: when both input and output are file paths,
446/// ibs == obs, no conversions, and no iflag/oflag are set, bypass
447/// Box<dyn Read/Write> and use libc::read/write directly. Handles
448/// char devices (e.g. /dev/zero) that copy_file_range can't handle.
449#[cfg(target_os = "linux")]
450fn try_raw_dd(config: &DdConfig) -> Option<io::Result<DdStats>> {
451    if config.input.is_none() || config.output.is_none() {
452        return None;
453    }
454    if has_conversions(&config.conv) || config.ibs != config.obs {
455        return None;
456    }
457    // Bail out if any iflag/oflag is set — we don't apply open() flags here
458    if has_flags(&config.iflag) || has_flags(&config.oflag) {
459        return None;
460    }
461
462    let start_time = Instant::now();
463    let in_path = config.input.as_ref().unwrap();
464    let out_path = config.output.as_ref().unwrap();
465
466    // Build CStrings before opening any FDs to avoid leaks on interior NUL
467    let in_cstr = match std::ffi::CString::new(in_path.as_str()) {
468        Ok(c) => c,
469        Err(_) => {
470            return Some(Err(io::Error::new(
471                io::ErrorKind::InvalidInput,
472                format!("input path contains NUL byte: '{}'", in_path),
473            )));
474        }
475    };
476    let out_cstr = match std::ffi::CString::new(out_path.as_str()) {
477        Ok(c) => c,
478        Err(_) => {
479            return Some(Err(io::Error::new(
480                io::ErrorKind::InvalidInput,
481                format!("output path contains NUL byte: '{}'", out_path),
482            )));
483        }
484    };
485
486    // Open input (O_CLOEXEC prevents FD inheritance in child processes)
487    let in_fd = unsafe {
488        libc::open(
489            in_cstr.as_ptr(),
490            libc::O_RDONLY | libc::O_CLOEXEC | libc::O_NOATIME,
491        )
492    };
493    let in_fd = if in_fd < 0 {
494        let first_err = io::Error::last_os_error();
495        if first_err.raw_os_error() == Some(libc::EPERM) {
496            // Retry without O_NOATIME — only EPERM means "file not owned by us"
497            let fd = unsafe { libc::open(in_cstr.as_ptr(), libc::O_RDONLY | libc::O_CLOEXEC) };
498            if fd < 0 {
499                return Some(Err(io::Error::last_os_error()));
500            }
501            fd
502        } else {
503            return Some(Err(first_err));
504        }
505    } else {
506        in_fd
507    };
508
509    // Open output (O_CLOEXEC prevents FD inheritance)
510    let mut oflags = libc::O_WRONLY | libc::O_CLOEXEC;
511    if config.conv.excl {
512        oflags |= libc::O_CREAT | libc::O_EXCL;
513    } else if config.conv.nocreat {
514        // don't create
515    } else {
516        oflags |= libc::O_CREAT;
517    }
518    if !config.conv.notrunc && !config.conv.excl {
519        oflags |= libc::O_TRUNC;
520    }
521
522    let out_fd = unsafe { libc::open(out_cstr.as_ptr(), oflags, 0o666 as libc::mode_t) };
523    if out_fd < 0 {
524        unsafe { libc::close(in_fd) };
525        return Some(Err(io::Error::last_os_error()));
526    }
527
528    // Hint kernel for sequential readahead on input
529    unsafe {
530        libc::posix_fadvise(in_fd, 0, 0, libc::POSIX_FADV_SEQUENTIAL);
531    }
532
533    // Handle skip (seek input) — use checked_mul to prevent overflow
534    if config.skip > 0 {
535        let offset = match (config.skip as u64).checked_mul(config.ibs as u64) {
536            Some(o) if o <= i64::MAX as u64 => o as i64,
537            _ => {
538                unsafe {
539                    libc::close(in_fd);
540                    libc::close(out_fd);
541                }
542                return Some(Err(io::Error::new(
543                    io::ErrorKind::InvalidInput,
544                    "skip offset overflow",
545                )));
546            }
547        };
548        if unsafe { libc::lseek(in_fd, offset, libc::SEEK_SET) } < 0 {
549            // lseek failed (e.g. char device) — read and discard full blocks
550            let mut discard = vec![0u8; config.ibs];
551            'skip: for _ in 0..config.skip {
552                let mut skipped = 0usize;
553                while skipped < config.ibs {
554                    let n = unsafe {
555                        libc::read(
556                            in_fd,
557                            discard[skipped..].as_mut_ptr() as *mut _,
558                            config.ibs - skipped,
559                        )
560                    };
561                    if n > 0 {
562                        skipped += n as usize;
563                    } else if n == 0 {
564                        break 'skip; // EOF
565                    } else {
566                        let err = io::Error::last_os_error();
567                        if err.kind() == io::ErrorKind::Interrupted {
568                            continue;
569                        }
570                        // Non-EINTR error during skip — log and abort skip phase
571                        eprintln!("dd: error skipping input: {}", err);
572                        break 'skip;
573                    }
574                }
575            }
576        }
577    }
578
579    // Handle seek (seek output) — use checked_mul to prevent overflow
580    if config.seek > 0 {
581        let offset = match (config.seek as u64).checked_mul(config.obs as u64) {
582            Some(o) if o <= i64::MAX as u64 => o as i64,
583            _ => {
584                unsafe {
585                    libc::close(in_fd);
586                    libc::close(out_fd);
587                }
588                return Some(Err(io::Error::new(
589                    io::ErrorKind::InvalidInput,
590                    "seek offset overflow",
591                )));
592            }
593        };
594        if unsafe { libc::lseek(out_fd, offset, libc::SEEK_SET) } < 0 {
595            let err = io::Error::last_os_error();
596            unsafe {
597                libc::close(in_fd);
598                libc::close(out_fd);
599            }
600            return Some(Err(err));
601        }
602    }
603
604    let mut stats = DdStats::default();
605    let bs = config.ibs;
606    let mut ibuf = vec![0u8; bs];
607    let count_limit = config.count;
608
609    loop {
610        if let Some(limit) = count_limit {
611            if stats.records_in_full + stats.records_in_partial >= limit {
612                break;
613            }
614        }
615
616        // Raw read — retry on EINTR, loop for full block
617        let mut total_read = 0usize;
618        let mut read_error = false;
619        while total_read < bs {
620            let ret = unsafe {
621                libc::read(
622                    in_fd,
623                    ibuf[total_read..].as_mut_ptr() as *mut _,
624                    bs - total_read,
625                )
626            };
627            if ret > 0 {
628                total_read += ret as usize;
629            } else if ret == 0 {
630                break; // EOF
631            } else {
632                let err = io::Error::last_os_error();
633                if err.kind() == io::ErrorKind::Interrupted {
634                    continue;
635                }
636                if config.conv.noerror {
637                    eprintln!("dd: error reading '{}': {}", in_path, err);
638                    read_error = true;
639                    break;
640                }
641                unsafe {
642                    libc::close(in_fd);
643                    libc::close(out_fd);
644                }
645                return Some(Err(err));
646            }
647        }
648
649        // conv=noerror: skip entire bad block (GNU behavior)
650        if read_error {
651            stats.records_in_partial += 1;
652            continue;
653        }
654
655        if total_read == 0 {
656            break;
657        }
658
659        if total_read == bs {
660            stats.records_in_full += 1;
661        } else {
662            stats.records_in_partial += 1;
663        }
664
665        // Raw write — retry on EINTR, treat write(0) as error
666        let mut written = 0usize;
667        while written < total_read {
668            let ret = unsafe {
669                libc::write(
670                    out_fd,
671                    ibuf[written..].as_ptr() as *const _,
672                    total_read - written,
673                )
674            };
675            if ret > 0 {
676                written += ret as usize;
677            } else if ret == 0 {
678                // write() returning 0 is abnormal — treat as error
679                unsafe {
680                    libc::close(in_fd);
681                    libc::close(out_fd);
682                }
683                return Some(Err(io::Error::new(
684                    io::ErrorKind::WriteZero,
685                    "write returned 0",
686                )));
687            } else {
688                let err = io::Error::last_os_error();
689                if err.kind() == io::ErrorKind::Interrupted {
690                    continue;
691                }
692                unsafe {
693                    libc::close(in_fd);
694                    libc::close(out_fd);
695                }
696                return Some(Err(err));
697            }
698        }
699
700        stats.bytes_copied += written as u64;
701        if written == bs {
702            stats.records_out_full += 1;
703        } else {
704            stats.records_out_partial += 1;
705        }
706    }
707
708    // fsync / fdatasync — propagate errors
709    if config.conv.fsync {
710        if unsafe { libc::fsync(out_fd) } < 0 {
711            let err = io::Error::last_os_error();
712            unsafe {
713                libc::close(in_fd);
714                libc::close(out_fd);
715            }
716            return Some(Err(err));
717        }
718    } else if config.conv.fdatasync {
719        if unsafe { libc::fdatasync(out_fd) } < 0 {
720            let err = io::Error::last_os_error();
721            unsafe {
722                libc::close(in_fd);
723                libc::close(out_fd);
724            }
725            return Some(Err(err));
726        }
727    }
728
729    unsafe { libc::close(in_fd) };
730    // Check close(out_fd) — on NFS, close can report deferred write errors
731    if unsafe { libc::close(out_fd) } < 0 {
732        return Some(Err(io::Error::last_os_error()));
733    }
734
735    if config.status != StatusLevel::None {
736        print_stats(&stats, start_time.elapsed(), config.status);
737    }
738
739    Some(Ok(stats))
740}
741
742/// Fast path: use copy_file_range when both input and output are files
743/// and no conversions are needed. This is zero-copy in the kernel.
744#[cfg(target_os = "linux")]
745fn try_copy_file_range_dd(config: &DdConfig) -> Option<io::Result<DdStats>> {
746    // Only usable when both are files, no conversions, and ibs == obs
747    if config.input.is_none() || config.output.is_none() {
748        return None;
749    }
750    if has_conversions(&config.conv) || config.ibs != config.obs {
751        return None;
752    }
753
754    let start_time = Instant::now();
755    let in_path = config.input.as_ref().unwrap();
756    let out_path = config.output.as_ref().unwrap();
757
758    let in_file = match File::open(in_path) {
759        Ok(f) => f,
760        Err(e) => return Some(Err(e)),
761    };
762
763    // Hint kernel for sequential readahead
764    {
765        use std::os::unix::io::AsRawFd;
766        unsafe {
767            libc::posix_fadvise(in_file.as_raw_fd(), 0, 0, libc::POSIX_FADV_SEQUENTIAL);
768        }
769    }
770
771    let mut out_opts = OpenOptions::new();
772    out_opts.write(true);
773    if config.conv.excl {
774        out_opts.create_new(true);
775    } else if !config.conv.nocreat {
776        out_opts.create(true);
777    }
778    if !config.conv.notrunc && !config.conv.excl {
779        out_opts.truncate(true);
780    }
781
782    let out_file = match out_opts.open(out_path) {
783        Ok(f) => f,
784        Err(e) => return Some(Err(e)),
785    };
786
787    use std::os::unix::io::AsRawFd;
788    let in_fd = in_file.as_raw_fd();
789    let out_fd = out_file.as_raw_fd();
790
791    // Handle skip
792    let skip_bytes = config.skip * config.ibs as u64;
793    let seek_bytes = config.seek * config.obs as u64;
794    let mut in_off: i64 = skip_bytes as i64;
795    let mut out_off: i64 = seek_bytes as i64;
796
797    let mut stats = DdStats::default();
798    let block_size = config.ibs;
799
800    // Determine total bytes to copy
801    let total_to_copy = config.count.map(|count| count * block_size as u64);
802
803    let mut bytes_remaining = total_to_copy;
804    loop {
805        let chunk = match bytes_remaining {
806            Some(0) => break,
807            Some(r) => r.min(block_size as u64 * 1024) as usize, // copy in large chunks
808            None => block_size * 1024,
809        };
810
811        // SAFETY: in_fd and out_fd are valid file descriptors (files are open for the
812        // lifetime of this function). in_off and out_off are valid, aligned i64 pointers
813        // with no aliasing. The kernel updates offsets atomically. Return value is checked:
814        // negative = error, 0 = EOF, positive = bytes copied.
815        let ret = unsafe {
816            libc::syscall(
817                libc::SYS_copy_file_range,
818                in_fd,
819                &mut in_off as *mut i64,
820                out_fd,
821                &mut out_off as *mut i64,
822                chunk,
823                0u32,
824            )
825        };
826
827        if ret < 0 {
828            let err = io::Error::last_os_error();
829            if err.raw_os_error() == Some(libc::EINVAL)
830                || err.raw_os_error() == Some(libc::ENOSYS)
831                || err.raw_os_error() == Some(libc::EXDEV)
832            {
833                return None; // Fall back to regular copy
834            }
835            return Some(Err(err));
836        }
837        if ret == 0 {
838            break;
839        }
840
841        let copied = ret as u64;
842        stats.bytes_copied += copied;
843
844        // Track block stats
845        let full_blocks = copied / block_size as u64;
846        let partial = copied % block_size as u64;
847        stats.records_in_full += full_blocks;
848        stats.records_out_full += full_blocks;
849        if partial > 0 {
850            stats.records_in_partial += 1;
851            stats.records_out_partial += 1;
852        }
853
854        if let Some(ref mut r) = bytes_remaining {
855            *r = r.saturating_sub(copied);
856        }
857    }
858
859    // fsync / fdatasync
860    if config.conv.fsync {
861        if let Err(e) = out_file.sync_all() {
862            return Some(Err(e));
863        }
864    } else if config.conv.fdatasync {
865        if let Err(e) = out_file.sync_data() {
866            return Some(Err(e));
867        }
868    }
869
870    if config.status != StatusLevel::None {
871        print_stats(&stats, start_time.elapsed(), config.status);
872    }
873
874    Some(Ok(stats))
875}
876
877/// Perform the dd copy operation.
878pub fn dd_copy(config: &DdConfig) -> io::Result<DdStats> {
879    // Try zero-copy fast path on Linux (file-to-file)
880    #[cfg(target_os = "linux")]
881    {
882        if let Some(result) = try_copy_file_range_dd(config) {
883            return result;
884        }
885    }
886    // Raw syscall fast path: handles devices like /dev/zero where copy_file_range fails
887    #[cfg(target_os = "linux")]
888    {
889        if let Some(result) = try_raw_dd(config) {
890            return result;
891        }
892    }
893    let start_time = Instant::now();
894
895    // Only clone file handles when skip/seek are needed (avoids dup() syscalls otherwise)
896    let needs_input_seek = config.skip > 0;
897    let needs_output_seek = config.seek > 0;
898
899    let mut input_file: Option<File> = None;
900    let mut input: Box<dyn Read> = if let Some(ref path) = config.input {
901        let file = File::open(path)
902            .map_err(|e| io::Error::new(e.kind(), format!("failed to open '{}': {}", path, e)))?;
903        // Hint kernel for sequential readahead
904        #[cfg(target_os = "linux")]
905        {
906            use std::os::unix::io::AsRawFd;
907            unsafe {
908                libc::posix_fadvise(file.as_raw_fd(), 0, 0, libc::POSIX_FADV_SEQUENTIAL);
909            }
910        }
911        if needs_input_seek {
912            input_file = Some(file.try_clone()?);
913        }
914        Box::new(file)
915    } else {
916        Box::new(io::stdin())
917    };
918
919    // Handle output file creation/opening
920    let mut output_file: Option<File> = None;
921    let mut output: Box<dyn Write> = if let Some(ref path) = config.output {
922        let mut opts = OpenOptions::new();
923        opts.write(true);
924
925        if config.conv.excl {
926            // excl: fail if file exists (create_new implies create)
927            opts.create_new(true);
928        } else if config.conv.nocreat {
929            // nocreat: do not create, file must exist
930            // Don't set create at all
931        } else {
932            opts.create(true);
933        }
934
935        if config.conv.notrunc {
936            opts.truncate(false);
937        } else if !config.conv.excl {
938            // Default: truncate (but not with excl since create_new starts fresh)
939            opts.truncate(true);
940        }
941
942        let file = opts
943            .open(path)
944            .map_err(|e| io::Error::new(e.kind(), format!("failed to open '{}': {}", path, e)))?;
945        if needs_output_seek || config.conv.fsync || config.conv.fdatasync {
946            // Clone for: (1) seek positioning (Box<dyn Write> can't seek directly),
947            // and (2) sync_all/sync_data at end. Safe because dup()-cloned fds
948            // share the same open file description.
949            output_file = Some(file.try_clone()?);
950        }
951        Box::new(file)
952    } else {
953        Box::new(io::stdout())
954    };
955
956    // Skip input — use seek() for file inputs to avoid reading and discarding data
957    if config.skip > 0 {
958        if config.iflag.skip_bytes {
959            // skip_bytes: skip N bytes, not N blocks
960            if let Some(ref mut f) = input_file {
961                f.seek(SeekFrom::Start(config.skip))?;
962                let seeked = f.try_clone()?;
963                input = Box::new(seeked);
964            } else {
965                skip_input_bytes(&mut input, config.skip)?;
966            }
967        } else if let Some(ref mut f) = input_file {
968            skip_input_seek(f, config.skip, config.ibs)?;
969            // Rebuild the input Box with a clone at the seeked position
970            let seeked = f.try_clone()?;
971            input = Box::new(seeked);
972        } else {
973            skip_input(&mut input, config.skip, config.ibs)?;
974        }
975    }
976
977    // Seek output blocks
978    if config.seek > 0 {
979        if let Some(ref mut f) = output_file {
980            seek_output_file(f, config.seek, config.obs)?;
981            // Rebuild the output Box with a new clone at the seeked position
982            let seeked = f.try_clone()?;
983            output = Box::new(seeked);
984        } else {
985            seek_output(&mut output, config.seek, config.obs)?;
986        }
987    }
988
989    let mut stats = DdStats::default();
990    let mut ibuf = vec![0u8; config.ibs];
991    let mut obuf: Vec<u8> = Vec::with_capacity(config.obs);
992    let mut unblock_buf: Vec<u8> = Vec::new();
993    // For count_bytes mode, track total bytes read
994    let mut bytes_read_total: u64 = 0;
995
996    loop {
997        // Check count limit
998        if let Some(count) = config.count {
999            if config.iflag.count_bytes {
1000                if bytes_read_total >= count {
1001                    break;
1002                }
1003            } else if stats.records_in_full + stats.records_in_partial >= count {
1004                break;
1005            }
1006        }
1007
1008        // When count_bytes is active, limit the read to the remaining bytes
1009        let read_size = if config.iflag.count_bytes {
1010            if let Some(count) = config.count {
1011                let remaining = count.saturating_sub(bytes_read_total);
1012                std::cmp::min(config.ibs, remaining as usize)
1013            } else {
1014                config.ibs
1015            }
1016        } else {
1017            config.ibs
1018        };
1019        if read_size == 0 {
1020            break;
1021        }
1022
1023        // Read one input block
1024        let n = match read_full_block(&mut input, &mut ibuf[..read_size]) {
1025            Ok(n) => n,
1026            Err(e) => {
1027                if config.conv.noerror {
1028                    if config.status != StatusLevel::None {
1029                        eprintln!("dd: error reading input: {}", e);
1030                    }
1031                    // On noerror with sync, fill the entire block with NULs
1032                    if config.conv.sync {
1033                        ibuf.fill(0);
1034                        config.ibs
1035                    } else {
1036                        continue;
1037                    }
1038                } else {
1039                    return Err(e);
1040                }
1041            }
1042        };
1043
1044        if n == 0 {
1045            break;
1046        }
1047
1048        bytes_read_total += n as u64;
1049
1050        // Track full vs partial blocks
1051        if n == config.ibs {
1052            stats.records_in_full += 1;
1053        } else {
1054            stats.records_in_partial += 1;
1055            // Pad if conv=sync: spaces for block/unblock, NULs otherwise
1056            if config.conv.sync {
1057                let pad_byte = if config.conv.block || config.conv.unblock {
1058                    b' '
1059                } else {
1060                    0u8
1061                };
1062                ibuf[n..config.ibs].fill(pad_byte);
1063            }
1064        }
1065
1066        // Determine the data slice to use and apply conversions in-place
1067        let effective_len = if config.conv.sync { config.ibs } else { n };
1068        apply_conversions(&mut ibuf[..effective_len], &config.conv);
1069
1070        // Apply unblock conversion: split fixed-length records into
1071        // newline-terminated records with trailing spaces stripped
1072        let write_data: &[u8] = if config.conv.unblock && config.cbs > 0 {
1073            unblock_buf.clear();
1074            let data = &ibuf[..effective_len];
1075            let mut pos = 0;
1076            while pos < data.len() {
1077                let end = std::cmp::min(pos + config.cbs, data.len());
1078                let record = &data[pos..end];
1079                // Strip trailing spaces
1080                let trimmed_len = record
1081                    .iter()
1082                    .rposition(|&b| b != b' ')
1083                    .map(|p| p + 1)
1084                    .unwrap_or(0);
1085                unblock_buf.extend_from_slice(&record[..trimmed_len]);
1086                unblock_buf.push(b'\n');
1087                pos = end;
1088            }
1089            &unblock_buf
1090        } else {
1091            &ibuf[..effective_len]
1092        };
1093
1094        // Buffer output and flush when we have enough for a full output block.
1095        // Use efficient buffer management: write directly from ibuf when possible,
1096        // only buffer when ibs != obs.
1097        let wd_len = write_data.len();
1098        if config.ibs == config.obs && obuf.is_empty() && !config.conv.unblock {
1099            // Fast path: ibs == obs, write directly
1100            output.write_all(write_data)?;
1101            if wd_len == config.obs {
1102                stats.records_out_full += 1;
1103            } else {
1104                stats.records_out_partial += 1;
1105            }
1106            stats.bytes_copied += wd_len as u64;
1107            // Skip the drain loop below since we wrote directly
1108            continue;
1109        }
1110
1111        // Append write_data to obuf and drain full output blocks.
1112        // We write directly from write_data when possible to avoid copying
1113        // through obuf. Only buffer the remainder that doesn't fill a block.
1114        let obs = config.obs;
1115        let mut wd_off = 0;
1116
1117        // If obuf has leftover bytes, try to complete a full block
1118        if !obuf.is_empty() {
1119            let need = obs - obuf.len();
1120            if write_data.len() >= need {
1121                obuf.extend_from_slice(&write_data[..need]);
1122                output.write_all(&obuf)?;
1123                stats.records_out_full += 1;
1124                stats.bytes_copied += obs as u64;
1125                obuf.clear();
1126                wd_off = need;
1127            } else {
1128                obuf.extend_from_slice(write_data);
1129                wd_off = write_data.len();
1130            }
1131        }
1132
1133        // Write full blocks directly from write_data (zero-copy)
1134        let remaining_wd = &write_data[wd_off..];
1135        let full_blocks = remaining_wd.len() / obs;
1136        if full_blocks > 0 {
1137            let full_len = full_blocks * obs;
1138            output.write_all(&remaining_wd[..full_len])?;
1139            stats.records_out_full += full_blocks as u64;
1140            stats.bytes_copied += full_len as u64;
1141            wd_off += full_len;
1142        }
1143
1144        // Buffer any remaining partial block
1145        let leftover = &write_data[wd_off..];
1146        if !leftover.is_empty() {
1147            obuf.extend_from_slice(leftover);
1148        }
1149    }
1150
1151    // Flush remaining partial output block
1152    if !obuf.is_empty() {
1153        output.write_all(&obuf)?;
1154        stats.records_out_partial += 1;
1155        stats.bytes_copied += obuf.len() as u64;
1156    }
1157
1158    // Flush output
1159    output.flush()?;
1160
1161    // fsync / fdatasync (output_file is Some when seek or sync was requested)
1162    if let Some(ref f) = output_file {
1163        if config.conv.fsync {
1164            f.sync_all()?;
1165        } else if config.conv.fdatasync {
1166            f.sync_data()?;
1167        }
1168    }
1169
1170    let elapsed = start_time.elapsed();
1171
1172    // Print status
1173    if config.status != StatusLevel::None {
1174        print_stats(&stats, elapsed, config.status);
1175    }
1176
1177    Ok(stats)
1178}
1179
1180/// Print dd transfer statistics to stderr.
1181fn print_stats(stats: &DdStats, elapsed: std::time::Duration, status: StatusLevel) {
1182    eprintln!(
1183        "{}+{} records in",
1184        stats.records_in_full, stats.records_in_partial
1185    );
1186    eprintln!(
1187        "{}+{} records out",
1188        stats.records_out_full, stats.records_out_partial
1189    );
1190
1191    if status == StatusLevel::NoXfer {
1192        return;
1193    }
1194
1195    let secs = elapsed.as_secs_f64();
1196    if secs > 0.0 {
1197        let rate = stats.bytes_copied as f64 / secs;
1198        eprintln!(
1199            "{} bytes copied, {:.6} s, {}/s",
1200            stats.bytes_copied,
1201            secs,
1202            human_size(rate as u64)
1203        );
1204    } else {
1205        eprintln!("{} bytes copied", stats.bytes_copied);
1206    }
1207}
1208
1209/// Format a byte count as a human-readable string (e.g., "1.5 MB").
1210fn human_size(bytes: u64) -> String {
1211    const UNITS: &[&str] = &["B", "kB", "MB", "GB", "TB", "PB", "EB"];
1212    let mut size = bytes as f64;
1213    for &unit in UNITS {
1214        if size < 1000.0 {
1215            if size == size.floor() {
1216                return format!("{} {}", size as u64, unit);
1217            }
1218            return format!("{:.1} {}", size, unit);
1219        }
1220        size /= 1000.0;
1221    }
1222    format!("{:.1} EB", size * 1000.0)
1223}
1224
1225/// Print help message for dd.
1226pub fn print_help() {
1227    eprint!(
1228        "\
1229Usage: dd [OPERAND]...
1230  or:  dd OPTION
1231Copy a file, converting and formatting according to the operands.
1232
1233  bs=BYTES        read and write up to BYTES bytes at a time (default: 512)
1234  cbs=BYTES       convert BYTES bytes at a time
1235  conv=CONVS      convert the file as per the comma separated symbol list
1236  count=N         copy only N input blocks
1237  ibs=BYTES       read up to BYTES bytes at a time (default: 512)
1238  if=FILE         read from FILE instead of stdin
1239  iflag=FLAGS     read as per the comma separated symbol list
1240  obs=BYTES       write BYTES bytes at a time (default: 512)
1241  of=FILE         write to FILE instead of stdout
1242  oflag=FLAGS     write as per the comma separated symbol list
1243  seek=N          skip N obs-sized blocks at start of output
1244  skip=N          skip N ibs-sized blocks at start of input
1245  status=LEVEL    LEVEL of information to print to stderr;
1246                  'none' suppresses everything but error messages,
1247                  'noerror' suppresses the final transfer statistics,
1248                  'progress' shows periodic transfer statistics
1249
1250  BLOCKS and BYTES may be followed by the following multiplicative suffixes:
1251  c=1, w=2, b=512, kB=1000, K=1024, MB=1000*1000, M=1024*1024,
1252  GB=1000*1000*1000, GiB=1024*1024*1024, and so on for T, P, E.
1253
1254Each CONV symbol may be:
1255
1256  lcase     change upper case to lower case
1257  ucase     change lower case to upper case
1258  swab      swap every pair of input bytes
1259  sync      pad every input block with NULs to ibs-size
1260  noerror   continue after read errors
1261  notrunc   do not truncate the output file
1262  fdatasync physically write output file data before finishing
1263  fsync     likewise, but also write metadata
1264  excl      fail if the output file already exists
1265  nocreat   do not create the output file
1266
1267Each FLAG symbol may be:
1268
1269  append    append mode (makes sense only for output; conv=notrunc suggested)
1270  direct    use direct I/O for data
1271  directory fail unless a directory
1272  dsync     use synchronized I/O for data
1273  sync      likewise, but also for metadata
1274  fullblock accumulate full blocks of input (iflag only)
1275  nonblock  use non-blocking I/O
1276  noatime   do not update access time
1277  nocache   Request to drop cache
1278  noctty    do not assign controlling terminal from file
1279  nofollow  do not follow symlinks
1280  count_bytes  treat 'count=N' as a byte count (iflag only)
1281  skip_bytes   treat 'skip=N' as a byte count (iflag only)
1282
1283  --help     display this help and exit
1284  --version  output version information and exit
1285"
1286    );
1287}
1288
1289/// Print version information for dd.
1290pub fn print_version() {
1291    eprintln!("dd (fcoreutils) {}", env!("CARGO_PKG_VERSION"));
1292}