Skip to main content

coreutils_rs/dd/
core.rs

1use std::fs::{File, OpenOptions};
2use std::io::{self, Read, Seek, SeekFrom, Write};
3use std::time::Instant;
4
5/// Status output level for dd.
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
7pub enum StatusLevel {
8    /// Print transfer stats at end (default).
9    #[default]
10    Default,
11    /// No informational messages to stderr.
12    None,
13    /// Print periodic transfer stats (like GNU dd `status=progress`).
14    Progress,
15    /// Like default but also suppress error messages.
16    NoError,
17    /// Print record counts but suppress transfer speed/bytes line.
18    NoXfer,
19}
20
21/// Conversion flags for dd (`conv=` option).
22#[derive(Debug, Clone, Default)]
23pub struct DdConv {
24    /// Convert to lowercase.
25    pub lcase: bool,
26    /// Convert to uppercase.
27    pub ucase: bool,
28    /// Swap every pair of input bytes.
29    pub swab: bool,
30    /// Continue after read errors.
31    pub noerror: bool,
32    /// Do not truncate the output file.
33    pub notrunc: bool,
34    /// Pad every input block with NULs to ibs-size.
35    pub sync: bool,
36    /// Call fdatasync on output before finishing.
37    pub fdatasync: bool,
38    /// Call fsync on output before finishing.
39    pub fsync: bool,
40    /// Fail if the output file already exists.
41    pub excl: bool,
42    /// Do not create the output file.
43    pub nocreat: bool,
44    /// Convert fixed-length records to newline-terminated (unblock).
45    pub unblock: bool,
46    /// Convert newline-terminated records to fixed-length (block).
47    pub block: bool,
48}
49
50/// Input/output flags for dd (`iflag=`/`oflag=` options).
51#[derive(Debug, Clone, Default)]
52pub struct DdFlags {
53    pub append: bool,
54    pub direct: bool,
55    pub directory: bool,
56    pub dsync: bool,
57    pub sync: bool,
58    pub fullblock: bool,
59    pub nonblock: bool,
60    pub noatime: bool,
61    pub nocache: bool,
62    pub noctty: bool,
63    pub nofollow: bool,
64    pub count_bytes: bool,
65    pub skip_bytes: bool,
66}
67
68/// Configuration for a dd operation.
69#[derive(Debug, Clone)]
70pub struct DdConfig {
71    /// Input file path (None = stdin).
72    pub input: Option<String>,
73    /// Output file path (None = stdout).
74    pub output: Option<String>,
75    /// Input block size in bytes.
76    pub ibs: usize,
77    /// Output block size in bytes.
78    pub obs: usize,
79    /// Conversion block size (for block/unblock).
80    pub cbs: usize,
81    /// Copy only this many input blocks (None = unlimited).
82    pub count: Option<u64>,
83    /// Skip this many ibs-sized blocks at start of input.
84    pub skip: u64,
85    /// Skip this many obs-sized blocks at start of output.
86    pub seek: u64,
87    /// Conversion options.
88    pub conv: DdConv,
89    /// Status output level.
90    pub status: StatusLevel,
91    /// Input flags.
92    pub iflag: DdFlags,
93    /// Output flags.
94    pub oflag: DdFlags,
95}
96
97impl Default for DdConfig {
98    fn default() -> Self {
99        DdConfig {
100            input: None,
101            output: None,
102            ibs: 512,
103            obs: 512,
104            cbs: 0,
105            count: None,
106            skip: 0,
107            seek: 0,
108            conv: DdConv::default(),
109            status: StatusLevel::default(),
110            iflag: DdFlags::default(),
111            oflag: DdFlags::default(),
112        }
113    }
114}
115
116/// Statistics from a dd copy operation.
117#[derive(Debug, Clone, Default)]
118pub struct DdStats {
119    /// Number of full input blocks read.
120    pub records_in_full: u64,
121    /// Number of partial input blocks read.
122    pub records_in_partial: u64,
123    /// Number of full output blocks written.
124    pub records_out_full: u64,
125    /// Number of partial output blocks written.
126    pub records_out_partial: u64,
127    /// Total bytes copied.
128    pub bytes_copied: u64,
129}
130
131/// Parse a GNU dd SIZE string with optional suffix and `x` multiplier.
132///
133/// Suffix conventions (matching GNU dd):
134///   - Single letter = binary (powers of 1024): k/K, M, G, T, P, E
135///   - `xB` suffix = decimal (powers of 1000): kB, KB, MB, GB, TB, PB, EB
136///   - `xIB` suffix = explicit binary: KiB, MiB, GiB, TiB, PiB, EiB
137///   - Special: c (1), w (2), b (512)
138///
139/// The `x` operator multiplies terms and chains recursively,
140/// so `1x2x4` = 1 * (2 * 4) = 8.
141pub fn parse_size(s: &str) -> Result<u64, String> {
142    let s = s.trim();
143    if s.is_empty() {
144        return Err("empty size string".to_string());
145    }
146
147    // GNU dd supports 'x' as multiplication: e.g. "2x512", "1Mx2", "1x2x4"
148    // Split on first 'x' and recurse on the right side for chaining.
149    if let Some(pos) = s.find('x') {
150        let left = parse_size_single(&s[..pos])?;
151        let right = parse_size(&s[pos + 1..])?;
152        return left
153            .checked_mul(right)
154            .ok_or_else(|| format!("size overflow: {} * {}", left, right));
155    }
156
157    parse_size_single(s)
158}
159
160fn parse_size_single(s: &str) -> Result<u64, String> {
161    if s.is_empty() {
162        return Err("empty size string".to_string());
163    }
164
165    // Find where the numeric part ends
166    let num_end = s.find(|c: char| !c.is_ascii_digit()).unwrap_or(s.len());
167
168    if num_end == 0 {
169        return Err(format!("invalid number: '{}'", s));
170    }
171
172    let num: u64 = s[..num_end]
173        .parse()
174        .map_err(|e| format!("invalid number '{}': {}", &s[..num_end], e))?;
175
176    let suffix = &s[num_end..];
177    // GNU dd suffix convention: single letter = binary (powers of 1024),
178    // xB suffix = decimal (powers of 1000), xIB suffix = binary (explicit).
179    let multiplier: u64 = match suffix {
180        "" => 1,
181        "c" => 1,
182        "w" => 2,
183        "b" => 512,
184        "k" | "K" => 1024,
185        "kB" | "KB" => 1000,
186        "KiB" => 1024,
187        "M" => 1_048_576,
188        "MB" => 1_000_000,
189        "MiB" => 1_048_576,
190        "G" => 1_073_741_824,
191        "GB" => 1_000_000_000,
192        "GiB" => 1_073_741_824,
193        "T" => 1_099_511_627_776,
194        "TB" => 1_000_000_000_000,
195        "TiB" => 1_099_511_627_776,
196        "P" => 1_125_899_906_842_624,
197        "PB" => 1_000_000_000_000_000,
198        "PiB" => 1_125_899_906_842_624,
199        "E" => 1_152_921_504_606_846_976,
200        "EB" => 1_000_000_000_000_000_000,
201        "EiB" => 1_152_921_504_606_846_976,
202        _ => return Err(format!("invalid suffix: '{}'", suffix)),
203    };
204
205    num.checked_mul(multiplier)
206        .ok_or_else(|| format!("size overflow: {} * {}", num, multiplier))
207}
208
209/// Parse dd command-line arguments (key=value pairs).
210pub fn parse_dd_args(args: &[String]) -> Result<DdConfig, String> {
211    let mut config = DdConfig::default();
212    let mut bs_set = false;
213
214    for arg in args {
215        if let Some((key, value)) = arg.split_once('=') {
216            match key {
217                "if" => config.input = Some(value.to_string()),
218                "of" => config.output = Some(value.to_string()),
219                "bs" => {
220                    let size = parse_size(value)? as usize;
221                    config.ibs = size;
222                    config.obs = size;
223                    bs_set = true;
224                }
225                "ibs" => {
226                    if !bs_set {
227                        config.ibs = parse_size(value)? as usize;
228                    }
229                }
230                "obs" => {
231                    if !bs_set {
232                        config.obs = parse_size(value)? as usize;
233                    }
234                }
235                "cbs" => config.cbs = parse_size(value)? as usize,
236                "count" => config.count = Some(parse_size(value)?),
237                "skip" => config.skip = parse_size(value)?,
238                "seek" => config.seek = parse_size(value)?,
239                "conv" => {
240                    for flag in value.split(',') {
241                        match flag {
242                            "lcase" => config.conv.lcase = true,
243                            "ucase" => config.conv.ucase = true,
244                            "swab" => config.conv.swab = true,
245                            "noerror" => config.conv.noerror = true,
246                            "notrunc" => config.conv.notrunc = true,
247                            "sync" => config.conv.sync = true,
248                            "fdatasync" => config.conv.fdatasync = true,
249                            "fsync" => config.conv.fsync = true,
250                            "excl" => config.conv.excl = true,
251                            "nocreat" => config.conv.nocreat = true,
252                            "block" => config.conv.block = true,
253                            "unblock" => config.conv.unblock = true,
254                            "" => {}
255                            _ => return Err(format!("invalid conversion: '{}'", flag)),
256                        }
257                    }
258                }
259                "iflag" => {
260                    for flag in value.split(',') {
261                        parse_flag(flag, &mut config.iflag)?;
262                    }
263                }
264                "oflag" => {
265                    for flag in value.split(',') {
266                        parse_flag(flag, &mut config.oflag)?;
267                    }
268                }
269                "status" => {
270                    config.status = match value {
271                        "none" => StatusLevel::None,
272                        "noxfer" => StatusLevel::NoXfer,
273                        "noerror" => StatusLevel::NoError,
274                        "progress" => StatusLevel::Progress,
275                        _ => return Err(format!("invalid status level: '{}'", value)),
276                    };
277                }
278                _ => return Err(format!("unrecognized operand: '{}'", arg)),
279            }
280        } else {
281            return Err(format!("unrecognized operand: '{}'", arg));
282        }
283    }
284
285    // Validate conflicting options
286    if config.conv.lcase && config.conv.ucase {
287        return Err("conv=lcase and conv=ucase are mutually exclusive".to_string());
288    }
289    if config.conv.excl && config.conv.nocreat {
290        return Err("conv=excl and conv=nocreat are mutually exclusive".to_string());
291    }
292
293    Ok(config)
294}
295
296/// Parse a single iflag/oflag value into the DdFlags struct.
297fn parse_flag(flag: &str, flags: &mut DdFlags) -> Result<(), String> {
298    match flag {
299        "append" => flags.append = true,
300        "direct" => flags.direct = true,
301        "directory" => flags.directory = true,
302        "dsync" => flags.dsync = true,
303        "sync" => flags.sync = true,
304        "fullblock" => flags.fullblock = true,
305        "nonblock" => flags.nonblock = true,
306        "noatime" => flags.noatime = true,
307        "nocache" => flags.nocache = true,
308        "noctty" => flags.noctty = true,
309        "nofollow" => flags.nofollow = true,
310        "count_bytes" => flags.count_bytes = true,
311        "skip_bytes" => flags.skip_bytes = true,
312        "" => {}
313        _ => return Err(format!("invalid flag: '{}'", flag)),
314    }
315    Ok(())
316}
317
318/// Read a full block from the reader, retrying on partial reads.
319/// Returns the number of bytes actually read (0 means EOF).
320fn read_full_block(reader: &mut dyn Read, buf: &mut [u8]) -> io::Result<usize> {
321    let mut total = 0;
322    while total < buf.len() {
323        match reader.read(&mut buf[total..]) {
324            Ok(0) => break,
325            Ok(n) => total += n,
326            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
327            Err(e) => return Err(e),
328        }
329    }
330    Ok(total)
331}
332
333/// Apply conversion options to a data block in-place.
334pub fn apply_conversions(data: &mut [u8], conv: &DdConv) {
335    if conv.swab {
336        // Swap every pair of bytes
337        let pairs = data.len() / 2;
338        for i in 0..pairs {
339            data.swap(i * 2, i * 2 + 1);
340        }
341    }
342
343    if conv.lcase {
344        for b in data.iter_mut() {
345            b.make_ascii_lowercase();
346        }
347    } else if conv.ucase {
348        for b in data.iter_mut() {
349            b.make_ascii_uppercase();
350        }
351    }
352}
353
354/// Skip input blocks by reading and discarding them.
355fn skip_input(reader: &mut dyn Read, blocks: u64, block_size: usize) -> io::Result<()> {
356    let mut discard_buf = vec![0u8; block_size];
357    for _ in 0..blocks {
358        let n = read_full_block(reader, &mut discard_buf)?;
359        if n == 0 {
360            break;
361        }
362    }
363    Ok(())
364}
365
366/// Skip input by reading and discarding exactly `bytes` bytes.
367fn skip_input_bytes(reader: &mut dyn Read, bytes: u64) -> io::Result<()> {
368    let mut remaining = bytes;
369    let mut discard_buf = [0u8; 8192];
370    while remaining > 0 {
371        let chunk = std::cmp::min(remaining, discard_buf.len() as u64) as usize;
372        let n = reader.read(&mut discard_buf[..chunk])?;
373        if n == 0 {
374            break;
375        }
376        remaining -= n as u64;
377    }
378    Ok(())
379}
380
381/// Skip input blocks by seeking (for seekable file inputs).
382fn skip_input_seek(file: &mut File, blocks: u64, block_size: usize) -> io::Result<()> {
383    let offset = blocks * block_size as u64;
384    file.seek(SeekFrom::Start(offset))?;
385    Ok(())
386}
387
388/// Seek output by writing zero blocks (for non-seekable outputs) or using seek.
389fn seek_output(writer: &mut Box<dyn Write>, seek_blocks: u64, block_size: usize) -> io::Result<()> {
390    // Try to seek if the writer supports it. Since we use Box<dyn Write>,
391    // we write zero blocks for the general case.
392    let zero_block = vec![0u8; block_size];
393    for _ in 0..seek_blocks {
394        writer.write_all(&zero_block)?;
395    }
396    Ok(())
397}
398
399/// Seek output on a file using actual file seeking.
400fn seek_output_file(file: &mut File, seek_blocks: u64, block_size: usize) -> io::Result<()> {
401    let offset = seek_blocks * block_size as u64;
402    file.seek(SeekFrom::Start(offset))?;
403    Ok(())
404}
405
406/// Check if any data conversion options are enabled.
407#[cfg(target_os = "linux")]
408fn has_conversions(conv: &DdConv) -> bool {
409    conv.lcase || conv.ucase || conv.swab || conv.sync || conv.block || conv.unblock
410}
411
412/// Check if any iflag/oflag fields require the generic path.
413/// Note: noatime is excluded because the raw path already uses O_NOATIME.
414/// fullblock is excluded because the raw read loop already reads full blocks.
415#[cfg(target_os = "linux")]
416fn has_flags(flags: &DdFlags) -> bool {
417    flags.append
418        || flags.direct
419        || flags.directory
420        || flags.dsync
421        || flags.sync
422        || flags.nonblock
423        || flags.nocache
424        || flags.noctty
425        || flags.nofollow
426        || flags.count_bytes
427        || flags.skip_bytes
428}
429
430/// Raw-syscall fast path: when both input and output are file paths,
431/// ibs == obs, no conversions, and no iflag/oflag are set, bypass
432/// Box<dyn Read/Write> and use libc::read/write directly. Handles
433/// char devices (e.g. /dev/zero) that copy_file_range can't handle.
434#[cfg(target_os = "linux")]
435fn try_raw_dd(config: &DdConfig) -> Option<io::Result<DdStats>> {
436    if config.input.is_none() || config.output.is_none() {
437        return None;
438    }
439    if has_conversions(&config.conv) || config.ibs != config.obs {
440        return None;
441    }
442    // Bail out if any iflag/oflag is set — we don't apply open() flags here
443    if has_flags(&config.iflag) || has_flags(&config.oflag) {
444        return None;
445    }
446
447    let start_time = Instant::now();
448    let in_path = config.input.as_ref().unwrap();
449    let out_path = config.output.as_ref().unwrap();
450
451    // Build CStrings before opening any FDs to avoid leaks on interior NUL
452    let in_cstr = match std::ffi::CString::new(in_path.as_str()) {
453        Ok(c) => c,
454        Err(_) => {
455            return Some(Err(io::Error::new(
456                io::ErrorKind::InvalidInput,
457                format!("input path contains NUL byte: '{}'", in_path),
458            )));
459        }
460    };
461    let out_cstr = match std::ffi::CString::new(out_path.as_str()) {
462        Ok(c) => c,
463        Err(_) => {
464            return Some(Err(io::Error::new(
465                io::ErrorKind::InvalidInput,
466                format!("output path contains NUL byte: '{}'", out_path),
467            )));
468        }
469    };
470
471    // Open input (O_CLOEXEC prevents FD inheritance in child processes)
472    let in_fd = unsafe {
473        libc::open(
474            in_cstr.as_ptr(),
475            libc::O_RDONLY | libc::O_CLOEXEC | libc::O_NOATIME,
476        )
477    };
478    let in_fd = if in_fd < 0 {
479        let first_err = io::Error::last_os_error();
480        if first_err.raw_os_error() == Some(libc::EPERM) {
481            // Retry without O_NOATIME — only EPERM means "file not owned by us"
482            let fd = unsafe { libc::open(in_cstr.as_ptr(), libc::O_RDONLY | libc::O_CLOEXEC) };
483            if fd < 0 {
484                return Some(Err(io::Error::last_os_error()));
485            }
486            fd
487        } else {
488            return Some(Err(first_err));
489        }
490    } else {
491        in_fd
492    };
493
494    // Open output (O_CLOEXEC prevents FD inheritance)
495    let mut oflags = libc::O_WRONLY | libc::O_CLOEXEC;
496    if config.conv.excl {
497        oflags |= libc::O_CREAT | libc::O_EXCL;
498    } else if config.conv.nocreat {
499        // don't create
500    } else {
501        oflags |= libc::O_CREAT;
502    }
503    if !config.conv.notrunc && !config.conv.excl {
504        oflags |= libc::O_TRUNC;
505    }
506
507    let out_fd = unsafe { libc::open(out_cstr.as_ptr(), oflags, 0o666 as libc::mode_t) };
508    if out_fd < 0 {
509        unsafe { libc::close(in_fd) };
510        return Some(Err(io::Error::last_os_error()));
511    }
512
513    // Handle skip (seek input) — use checked_mul to prevent overflow
514    if config.skip > 0 {
515        let offset = match (config.skip as u64).checked_mul(config.ibs as u64) {
516            Some(o) if o <= i64::MAX as u64 => o as i64,
517            _ => {
518                unsafe {
519                    libc::close(in_fd);
520                    libc::close(out_fd);
521                }
522                return Some(Err(io::Error::new(
523                    io::ErrorKind::InvalidInput,
524                    "skip offset overflow",
525                )));
526            }
527        };
528        if unsafe { libc::lseek(in_fd, offset, libc::SEEK_SET) } < 0 {
529            // lseek failed (e.g. char device) — read and discard full blocks
530            let mut discard = vec![0u8; config.ibs];
531            'skip: for _ in 0..config.skip {
532                let mut skipped = 0usize;
533                while skipped < config.ibs {
534                    let n = unsafe {
535                        libc::read(
536                            in_fd,
537                            discard[skipped..].as_mut_ptr() as *mut _,
538                            config.ibs - skipped,
539                        )
540                    };
541                    if n > 0 {
542                        skipped += n as usize;
543                    } else if n == 0 {
544                        break 'skip; // EOF
545                    } else {
546                        let err = io::Error::last_os_error();
547                        if err.kind() == io::ErrorKind::Interrupted {
548                            continue;
549                        }
550                        // Non-EINTR error during skip — log and abort skip phase
551                        eprintln!("dd: error skipping input: {}", err);
552                        break 'skip;
553                    }
554                }
555            }
556        }
557    }
558
559    // Handle seek (seek output) — use checked_mul to prevent overflow
560    if config.seek > 0 {
561        let offset = match (config.seek as u64).checked_mul(config.obs as u64) {
562            Some(o) if o <= i64::MAX as u64 => o as i64,
563            _ => {
564                unsafe {
565                    libc::close(in_fd);
566                    libc::close(out_fd);
567                }
568                return Some(Err(io::Error::new(
569                    io::ErrorKind::InvalidInput,
570                    "seek offset overflow",
571                )));
572            }
573        };
574        if unsafe { libc::lseek(out_fd, offset, libc::SEEK_SET) } < 0 {
575            let err = io::Error::last_os_error();
576            unsafe {
577                libc::close(in_fd);
578                libc::close(out_fd);
579            }
580            return Some(Err(err));
581        }
582    }
583
584    let mut stats = DdStats::default();
585    let bs = config.ibs;
586    let mut ibuf = vec![0u8; bs];
587    let count_limit = config.count;
588
589    loop {
590        if let Some(limit) = count_limit {
591            if stats.records_in_full + stats.records_in_partial >= limit {
592                break;
593            }
594        }
595
596        // Raw read — retry on EINTR, loop for full block
597        let mut total_read = 0usize;
598        let mut read_error = false;
599        while total_read < bs {
600            let ret = unsafe {
601                libc::read(
602                    in_fd,
603                    ibuf[total_read..].as_mut_ptr() as *mut _,
604                    bs - total_read,
605                )
606            };
607            if ret > 0 {
608                total_read += ret as usize;
609            } else if ret == 0 {
610                break; // EOF
611            } else {
612                let err = io::Error::last_os_error();
613                if err.kind() == io::ErrorKind::Interrupted {
614                    continue;
615                }
616                if config.conv.noerror {
617                    eprintln!("dd: error reading '{}': {}", in_path, err);
618                    read_error = true;
619                    break;
620                }
621                unsafe {
622                    libc::close(in_fd);
623                    libc::close(out_fd);
624                }
625                return Some(Err(err));
626            }
627        }
628
629        // conv=noerror: skip entire bad block (GNU behavior)
630        if read_error {
631            stats.records_in_partial += 1;
632            continue;
633        }
634
635        if total_read == 0 {
636            break;
637        }
638
639        if total_read == bs {
640            stats.records_in_full += 1;
641        } else {
642            stats.records_in_partial += 1;
643        }
644
645        // Raw write — retry on EINTR, treat write(0) as error
646        let mut written = 0usize;
647        while written < total_read {
648            let ret = unsafe {
649                libc::write(
650                    out_fd,
651                    ibuf[written..].as_ptr() as *const _,
652                    total_read - written,
653                )
654            };
655            if ret > 0 {
656                written += ret as usize;
657            } else if ret == 0 {
658                // write() returning 0 is abnormal — treat as error
659                unsafe {
660                    libc::close(in_fd);
661                    libc::close(out_fd);
662                }
663                return Some(Err(io::Error::new(
664                    io::ErrorKind::WriteZero,
665                    "write returned 0",
666                )));
667            } else {
668                let err = io::Error::last_os_error();
669                if err.kind() == io::ErrorKind::Interrupted {
670                    continue;
671                }
672                unsafe {
673                    libc::close(in_fd);
674                    libc::close(out_fd);
675                }
676                return Some(Err(err));
677            }
678        }
679
680        stats.bytes_copied += written as u64;
681        if written == bs {
682            stats.records_out_full += 1;
683        } else {
684            stats.records_out_partial += 1;
685        }
686    }
687
688    // fsync / fdatasync — propagate errors
689    if config.conv.fsync {
690        if unsafe { libc::fsync(out_fd) } < 0 {
691            let err = io::Error::last_os_error();
692            unsafe {
693                libc::close(in_fd);
694                libc::close(out_fd);
695            }
696            return Some(Err(err));
697        }
698    } else if config.conv.fdatasync {
699        if unsafe { libc::fdatasync(out_fd) } < 0 {
700            let err = io::Error::last_os_error();
701            unsafe {
702                libc::close(in_fd);
703                libc::close(out_fd);
704            }
705            return Some(Err(err));
706        }
707    }
708
709    unsafe { libc::close(in_fd) };
710    // Check close(out_fd) — on NFS, close can report deferred write errors
711    if unsafe { libc::close(out_fd) } < 0 {
712        return Some(Err(io::Error::last_os_error()));
713    }
714
715    if config.status != StatusLevel::None {
716        print_stats(&stats, start_time.elapsed(), config.status);
717    }
718
719    Some(Ok(stats))
720}
721
722/// Fast path: use copy_file_range when both input and output are files
723/// and no conversions are needed. This is zero-copy in the kernel.
724#[cfg(target_os = "linux")]
725fn try_copy_file_range_dd(config: &DdConfig) -> Option<io::Result<DdStats>> {
726    // Only usable when both are files, no conversions, and ibs == obs
727    if config.input.is_none() || config.output.is_none() {
728        return None;
729    }
730    if has_conversions(&config.conv) || config.ibs != config.obs {
731        return None;
732    }
733
734    let start_time = Instant::now();
735    let in_path = config.input.as_ref().unwrap();
736    let out_path = config.output.as_ref().unwrap();
737
738    let in_file = match File::open(in_path) {
739        Ok(f) => f,
740        Err(e) => return Some(Err(e)),
741    };
742
743    let mut out_opts = OpenOptions::new();
744    out_opts.write(true);
745    if config.conv.excl {
746        out_opts.create_new(true);
747    } else if !config.conv.nocreat {
748        out_opts.create(true);
749    }
750    if !config.conv.notrunc && !config.conv.excl {
751        out_opts.truncate(true);
752    }
753
754    let out_file = match out_opts.open(out_path) {
755        Ok(f) => f,
756        Err(e) => return Some(Err(e)),
757    };
758
759    use std::os::unix::io::AsRawFd;
760    let in_fd = in_file.as_raw_fd();
761    let out_fd = out_file.as_raw_fd();
762
763    // Handle skip
764    let skip_bytes = config.skip * config.ibs as u64;
765    let seek_bytes = config.seek * config.obs as u64;
766    let mut in_off: i64 = skip_bytes as i64;
767    let mut out_off: i64 = seek_bytes as i64;
768
769    let mut stats = DdStats::default();
770    let block_size = config.ibs;
771
772    // Determine total bytes to copy
773    let total_to_copy = config.count.map(|count| count * block_size as u64);
774
775    let mut bytes_remaining = total_to_copy;
776    loop {
777        let chunk = match bytes_remaining {
778            Some(0) => break,
779            Some(r) => r.min(block_size as u64 * 1024) as usize, // copy in large chunks
780            None => block_size * 1024,
781        };
782
783        // SAFETY: in_fd and out_fd are valid file descriptors (files are open for the
784        // lifetime of this function). in_off and out_off are valid, aligned i64 pointers
785        // with no aliasing. The kernel updates offsets atomically. Return value is checked:
786        // negative = error, 0 = EOF, positive = bytes copied.
787        let ret = unsafe {
788            libc::syscall(
789                libc::SYS_copy_file_range,
790                in_fd,
791                &mut in_off as *mut i64,
792                out_fd,
793                &mut out_off as *mut i64,
794                chunk,
795                0u32,
796            )
797        };
798
799        if ret < 0 {
800            let err = io::Error::last_os_error();
801            if err.raw_os_error() == Some(libc::EINVAL)
802                || err.raw_os_error() == Some(libc::ENOSYS)
803                || err.raw_os_error() == Some(libc::EXDEV)
804            {
805                return None; // Fall back to regular copy
806            }
807            return Some(Err(err));
808        }
809        if ret == 0 {
810            break;
811        }
812
813        let copied = ret as u64;
814        stats.bytes_copied += copied;
815
816        // Track block stats
817        let full_blocks = copied / block_size as u64;
818        let partial = copied % block_size as u64;
819        stats.records_in_full += full_blocks;
820        stats.records_out_full += full_blocks;
821        if partial > 0 {
822            stats.records_in_partial += 1;
823            stats.records_out_partial += 1;
824        }
825
826        if let Some(ref mut r) = bytes_remaining {
827            *r = r.saturating_sub(copied);
828        }
829    }
830
831    // fsync / fdatasync
832    if config.conv.fsync {
833        if let Err(e) = out_file.sync_all() {
834            return Some(Err(e));
835        }
836    } else if config.conv.fdatasync {
837        if let Err(e) = out_file.sync_data() {
838            return Some(Err(e));
839        }
840    }
841
842    if config.status != StatusLevel::None {
843        print_stats(&stats, start_time.elapsed(), config.status);
844    }
845
846    Some(Ok(stats))
847}
848
849/// Perform the dd copy operation.
850pub fn dd_copy(config: &DdConfig) -> io::Result<DdStats> {
851    // Try zero-copy fast path on Linux (file-to-file)
852    #[cfg(target_os = "linux")]
853    {
854        if let Some(result) = try_copy_file_range_dd(config) {
855            return result;
856        }
857    }
858    // Raw syscall fast path: handles devices like /dev/zero where copy_file_range fails
859    #[cfg(target_os = "linux")]
860    {
861        if let Some(result) = try_raw_dd(config) {
862            return result;
863        }
864    }
865    let start_time = Instant::now();
866
867    // Only clone file handles when skip/seek are needed (avoids dup() syscalls otherwise)
868    let needs_input_seek = config.skip > 0;
869    let needs_output_seek = config.seek > 0;
870
871    let mut input_file: Option<File> = None;
872    let mut input: Box<dyn Read> = if let Some(ref path) = config.input {
873        let file = File::open(path)
874            .map_err(|e| io::Error::new(e.kind(), format!("failed to open '{}': {}", path, e)))?;
875        if needs_input_seek {
876            input_file = Some(file.try_clone()?);
877        }
878        Box::new(file)
879    } else {
880        Box::new(io::stdin())
881    };
882
883    // Handle output file creation/opening
884    let mut output_file: Option<File> = None;
885    let mut output: Box<dyn Write> = if let Some(ref path) = config.output {
886        let mut opts = OpenOptions::new();
887        opts.write(true);
888
889        if config.conv.excl {
890            // excl: fail if file exists (create_new implies create)
891            opts.create_new(true);
892        } else if config.conv.nocreat {
893            // nocreat: do not create, file must exist
894            // Don't set create at all
895        } else {
896            opts.create(true);
897        }
898
899        if config.conv.notrunc {
900            opts.truncate(false);
901        } else if !config.conv.excl {
902            // Default: truncate (but not with excl since create_new starts fresh)
903            opts.truncate(true);
904        }
905
906        let file = opts
907            .open(path)
908            .map_err(|e| io::Error::new(e.kind(), format!("failed to open '{}': {}", path, e)))?;
909        if needs_output_seek || config.conv.fsync || config.conv.fdatasync {
910            // Clone for: (1) seek positioning (Box<dyn Write> can't seek directly),
911            // and (2) sync_all/sync_data at end. Safe because dup()-cloned fds
912            // share the same open file description.
913            output_file = Some(file.try_clone()?);
914        }
915        Box::new(file)
916    } else {
917        Box::new(io::stdout())
918    };
919
920    // Skip input — use seek() for file inputs to avoid reading and discarding data
921    if config.skip > 0 {
922        if config.iflag.skip_bytes {
923            // skip_bytes: skip N bytes, not N blocks
924            if let Some(ref mut f) = input_file {
925                f.seek(SeekFrom::Start(config.skip))?;
926                let seeked = f.try_clone()?;
927                input = Box::new(seeked);
928            } else {
929                skip_input_bytes(&mut input, config.skip)?;
930            }
931        } else if let Some(ref mut f) = input_file {
932            skip_input_seek(f, config.skip, config.ibs)?;
933            // Rebuild the input Box with a clone at the seeked position
934            let seeked = f.try_clone()?;
935            input = Box::new(seeked);
936        } else {
937            skip_input(&mut input, config.skip, config.ibs)?;
938        }
939    }
940
941    // Seek output blocks
942    if config.seek > 0 {
943        if let Some(ref mut f) = output_file {
944            seek_output_file(f, config.seek, config.obs)?;
945            // Rebuild the output Box with a new clone at the seeked position
946            let seeked = f.try_clone()?;
947            output = Box::new(seeked);
948        } else {
949            seek_output(&mut output, config.seek, config.obs)?;
950        }
951    }
952
953    let mut stats = DdStats::default();
954    let mut ibuf = vec![0u8; config.ibs];
955    let mut obuf: Vec<u8> = Vec::with_capacity(config.obs);
956    let mut unblock_buf: Vec<u8> = Vec::new();
957    // For count_bytes mode, track total bytes read
958    let mut bytes_read_total: u64 = 0;
959
960    loop {
961        // Check count limit
962        if let Some(count) = config.count {
963            if config.iflag.count_bytes {
964                if bytes_read_total >= count {
965                    break;
966                }
967            } else if stats.records_in_full + stats.records_in_partial >= count {
968                break;
969            }
970        }
971
972        // When count_bytes is active, limit the read to the remaining bytes
973        let read_size = if config.iflag.count_bytes {
974            if let Some(count) = config.count {
975                let remaining = count.saturating_sub(bytes_read_total);
976                std::cmp::min(config.ibs, remaining as usize)
977            } else {
978                config.ibs
979            }
980        } else {
981            config.ibs
982        };
983        if read_size == 0 {
984            break;
985        }
986
987        // Read one input block
988        let n = match read_full_block(&mut input, &mut ibuf[..read_size]) {
989            Ok(n) => n,
990            Err(e) => {
991                if config.conv.noerror {
992                    if config.status != StatusLevel::None {
993                        eprintln!("dd: error reading input: {}", e);
994                    }
995                    // On noerror with sync, fill the entire block with NULs
996                    if config.conv.sync {
997                        ibuf.fill(0);
998                        config.ibs
999                    } else {
1000                        continue;
1001                    }
1002                } else {
1003                    return Err(e);
1004                }
1005            }
1006        };
1007
1008        if n == 0 {
1009            break;
1010        }
1011
1012        bytes_read_total += n as u64;
1013
1014        // Track full vs partial blocks
1015        if n == config.ibs {
1016            stats.records_in_full += 1;
1017        } else {
1018            stats.records_in_partial += 1;
1019            // Pad if conv=sync: spaces for block/unblock, NULs otherwise
1020            if config.conv.sync {
1021                let pad_byte = if config.conv.block || config.conv.unblock {
1022                    b' '
1023                } else {
1024                    0u8
1025                };
1026                ibuf[n..config.ibs].fill(pad_byte);
1027            }
1028        }
1029
1030        // Determine the data slice to use and apply conversions in-place
1031        let effective_len = if config.conv.sync { config.ibs } else { n };
1032        apply_conversions(&mut ibuf[..effective_len], &config.conv);
1033
1034        // Apply unblock conversion: split fixed-length records into
1035        // newline-terminated records with trailing spaces stripped
1036        let write_data: &[u8] = if config.conv.unblock && config.cbs > 0 {
1037            unblock_buf.clear();
1038            let data = &ibuf[..effective_len];
1039            let mut pos = 0;
1040            while pos < data.len() {
1041                let end = std::cmp::min(pos + config.cbs, data.len());
1042                let record = &data[pos..end];
1043                // Strip trailing spaces
1044                let trimmed_len = record
1045                    .iter()
1046                    .rposition(|&b| b != b' ')
1047                    .map(|p| p + 1)
1048                    .unwrap_or(0);
1049                unblock_buf.extend_from_slice(&record[..trimmed_len]);
1050                unblock_buf.push(b'\n');
1051                pos = end;
1052            }
1053            &unblock_buf
1054        } else {
1055            &ibuf[..effective_len]
1056        };
1057
1058        // Buffer output and flush when we have enough for a full output block.
1059        // Use efficient buffer management: write directly from ibuf when possible,
1060        // only buffer when ibs != obs.
1061        let wd_len = write_data.len();
1062        if config.ibs == config.obs && obuf.is_empty() && !config.conv.unblock {
1063            // Fast path: ibs == obs, write directly
1064            output.write_all(write_data)?;
1065            if wd_len == config.obs {
1066                stats.records_out_full += 1;
1067            } else {
1068                stats.records_out_partial += 1;
1069            }
1070            stats.bytes_copied += wd_len as u64;
1071            // Skip the drain loop below since we wrote directly
1072            continue;
1073        }
1074
1075        obuf.extend_from_slice(write_data);
1076        let mut consumed = 0;
1077        while obuf.len() - consumed >= config.obs {
1078            output.write_all(&obuf[consumed..consumed + config.obs])?;
1079            stats.records_out_full += 1;
1080            stats.bytes_copied += config.obs as u64;
1081            consumed += config.obs;
1082        }
1083        if consumed > 0 {
1084            // Shift remaining bytes to front (more efficient than drain for large buffers)
1085            let remaining = obuf.len() - consumed;
1086            if remaining > 0 {
1087                obuf.copy_within(consumed.., 0);
1088            }
1089            obuf.truncate(remaining);
1090        }
1091    }
1092
1093    // Flush remaining partial output block
1094    if !obuf.is_empty() {
1095        output.write_all(&obuf)?;
1096        stats.records_out_partial += 1;
1097        stats.bytes_copied += obuf.len() as u64;
1098    }
1099
1100    // Flush output
1101    output.flush()?;
1102
1103    // fsync / fdatasync (output_file is Some when seek or sync was requested)
1104    if let Some(ref f) = output_file {
1105        if config.conv.fsync {
1106            f.sync_all()?;
1107        } else if config.conv.fdatasync {
1108            f.sync_data()?;
1109        }
1110    }
1111
1112    let elapsed = start_time.elapsed();
1113
1114    // Print status
1115    if config.status != StatusLevel::None {
1116        print_stats(&stats, elapsed, config.status);
1117    }
1118
1119    Ok(stats)
1120}
1121
1122/// Print dd transfer statistics to stderr.
1123fn print_stats(stats: &DdStats, elapsed: std::time::Duration, status: StatusLevel) {
1124    eprintln!(
1125        "{}+{} records in",
1126        stats.records_in_full, stats.records_in_partial
1127    );
1128    eprintln!(
1129        "{}+{} records out",
1130        stats.records_out_full, stats.records_out_partial
1131    );
1132
1133    if status == StatusLevel::NoXfer {
1134        return;
1135    }
1136
1137    let secs = elapsed.as_secs_f64();
1138    if secs > 0.0 {
1139        let rate = stats.bytes_copied as f64 / secs;
1140        eprintln!(
1141            "{} bytes copied, {:.6} s, {}/s",
1142            stats.bytes_copied,
1143            secs,
1144            human_size(rate as u64)
1145        );
1146    } else {
1147        eprintln!("{} bytes copied", stats.bytes_copied);
1148    }
1149}
1150
1151/// Format a byte count as a human-readable string (e.g., "1.5 MB").
1152fn human_size(bytes: u64) -> String {
1153    const UNITS: &[&str] = &["B", "kB", "MB", "GB", "TB", "PB", "EB"];
1154    let mut size = bytes as f64;
1155    for &unit in UNITS {
1156        if size < 1000.0 {
1157            if size == size.floor() {
1158                return format!("{} {}", size as u64, unit);
1159            }
1160            return format!("{:.1} {}", size, unit);
1161        }
1162        size /= 1000.0;
1163    }
1164    format!("{:.1} EB", size * 1000.0)
1165}
1166
1167/// Print help message for dd.
1168pub fn print_help() {
1169    eprint!(
1170        "\
1171Usage: dd [OPERAND]...
1172  or:  dd OPTION
1173Copy a file, converting and formatting according to the operands.
1174
1175  bs=BYTES        read and write up to BYTES bytes at a time (default: 512)
1176  cbs=BYTES       convert BYTES bytes at a time
1177  conv=CONVS      convert the file as per the comma separated symbol list
1178  count=N         copy only N input blocks
1179  ibs=BYTES       read up to BYTES bytes at a time (default: 512)
1180  if=FILE         read from FILE instead of stdin
1181  iflag=FLAGS     read as per the comma separated symbol list
1182  obs=BYTES       write BYTES bytes at a time (default: 512)
1183  of=FILE         write to FILE instead of stdout
1184  oflag=FLAGS     write as per the comma separated symbol list
1185  seek=N          skip N obs-sized blocks at start of output
1186  skip=N          skip N ibs-sized blocks at start of input
1187  status=LEVEL    LEVEL of information to print to stderr;
1188                  'none' suppresses everything but error messages,
1189                  'noerror' suppresses the final transfer statistics,
1190                  'progress' shows periodic transfer statistics
1191
1192  BLOCKS and BYTES may be followed by the following multiplicative suffixes:
1193  c=1, w=2, b=512, kB=1000, K=1024, MB=1000*1000, M=1024*1024,
1194  GB=1000*1000*1000, GiB=1024*1024*1024, and so on for T, P, E.
1195
1196Each CONV symbol may be:
1197
1198  lcase     change upper case to lower case
1199  ucase     change lower case to upper case
1200  swab      swap every pair of input bytes
1201  sync      pad every input block with NULs to ibs-size
1202  noerror   continue after read errors
1203  notrunc   do not truncate the output file
1204  fdatasync physically write output file data before finishing
1205  fsync     likewise, but also write metadata
1206  excl      fail if the output file already exists
1207  nocreat   do not create the output file
1208
1209Each FLAG symbol may be:
1210
1211  append    append mode (makes sense only for output; conv=notrunc suggested)
1212  direct    use direct I/O for data
1213  directory fail unless a directory
1214  dsync     use synchronized I/O for data
1215  sync      likewise, but also for metadata
1216  fullblock accumulate full blocks of input (iflag only)
1217  nonblock  use non-blocking I/O
1218  noatime   do not update access time
1219  nocache   Request to drop cache
1220  noctty    do not assign controlling terminal from file
1221  nofollow  do not follow symlinks
1222  count_bytes  treat 'count=N' as a byte count (iflag only)
1223  skip_bytes   treat 'skip=N' as a byte count (iflag only)
1224
1225  --help     display this help and exit
1226  --version  output version information and exit
1227"
1228    );
1229}
1230
1231/// Print version information for dd.
1232pub fn print_version() {
1233    eprintln!("dd (fcoreutils) {}", env!("CARGO_PKG_VERSION"));
1234}