coreutils_rs/od/
core.rs

1use std::io::{self, Read, Write};
2
3/// Address radix for the offset column.
4#[derive(Debug, Clone, Copy, PartialEq, Eq)]
5pub enum AddressRadix {
6    Octal,
7    Decimal,
8    Hex,
9    None,
10}
11
12/// Output format specifier.
13#[derive(Debug, Clone, Copy, PartialEq, Eq)]
14pub enum OutputFormat {
15    /// Named character (a): nul, soh, stx, ...
16    NamedChar,
17    /// Printable character or backslash escape (c): \0, \a, \b, \t, \n, ...
18    PrintableChar,
19    /// Signed decimal integer of given byte size (d1, d2, d4, d8)
20    SignedDec(usize),
21    /// Floating point of given byte size (f4, f8)
22    Float(usize),
23    /// Octal integer of given byte size (o1, o2, o4)
24    Octal(usize),
25    /// Unsigned decimal integer of given byte size (u1, u2, u4, u8)
26    UnsignedDec(usize),
27    /// Hexadecimal integer of given byte size (x1, x2, x4, x8)
28    Hex(usize),
29}
30
31/// Configuration for the od command.
32#[derive(Debug, Clone)]
33pub struct OdConfig {
34    pub address_radix: AddressRadix,
35    pub formats: Vec<OutputFormat>,
36    /// Per-format flag: if true, append printable ASCII annotation (the 'z' suffix).
37    pub z_flags: Vec<bool>,
38    pub skip_bytes: u64,
39    pub read_bytes: Option<u64>,
40    pub width: usize,
41    pub show_duplicates: bool,
42}
43
44impl Default for OdConfig {
45    fn default() -> Self {
46        Self {
47            address_radix: AddressRadix::Octal,
48            formats: vec![OutputFormat::Octal(2)],
49            z_flags: vec![false],
50            skip_bytes: 0,
51            read_bytes: None,
52            width: 16,
53            show_duplicates: false,
54        }
55    }
56}
57
58/// Named characters for -t a format (ASCII named characters).
59/// Index 0..=127 maps to the name for that byte value.
60const NAMED_CHARS: [&str; 128] = [
61    "nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel", " bs", " ht", " nl", " vt", " ff",
62    " cr", " so", " si", "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb", "can", " em",
63    "sub", "esc", " fs", " gs", " rs", " us", " sp", "!", "\"", "#", "$", "%", "&", "'", "(", ")",
64    "*", "+", ",", "-", ".", "/", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", ":", ";", "<",
65    "=", ">", "?", "@", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O",
66    "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "[", "\\", "]", "^", "_", "`", "a", "b",
67    "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u",
68    "v", "w", "x", "y", "z", "{", "|", "}", "~", "del",
69];
70
71/// Return the field width for a single value of the given format.
72/// This matches GNU od's column widths.
73fn field_width(fmt: OutputFormat) -> usize {
74    match fmt {
75        OutputFormat::NamedChar => 4, // 3 chars + leading space => " nul" = 4 wide
76        OutputFormat::PrintableChar => 4, // 3 chars + leading space => "  \\n" = 4 wide
77        OutputFormat::Octal(1) => 4,  // " 377"
78        OutputFormat::Octal(2) => 7,  // " 177777"
79        OutputFormat::Octal(4) => 12, // " 37777777777"
80        OutputFormat::Octal(8) => 23, // " 1777777777777777777777"
81        OutputFormat::Hex(1) => 3,    // " ff"
82        OutputFormat::Hex(2) => 5,    // " ffff"
83        OutputFormat::Hex(4) => 9,    // " ffffffff"
84        OutputFormat::Hex(8) => 17,   // " ffffffffffffffff"
85        OutputFormat::UnsignedDec(1) => 4, // " 255"
86        OutputFormat::UnsignedDec(2) => 6, // " 65535"
87        OutputFormat::UnsignedDec(4) => 11, // " 4294967295"
88        OutputFormat::UnsignedDec(8) => 21, // " 18446744073709551615"
89        OutputFormat::SignedDec(1) => 5, // " -128"
90        OutputFormat::SignedDec(2) => 7, // " -32768"
91        OutputFormat::SignedDec(4) => 12, // " -2147483648"
92        OutputFormat::SignedDec(8) => 21, // " -9223372036854775808"
93        OutputFormat::Float(4) => 16, // "   x.xxxxxxxe+xx" (3 leading spaces for positive max)
94        OutputFormat::Float(8) => 25, // " -x.xxxxxxxxxxxxxxe+xxx"
95        _ => 4,
96    }
97}
98
99/// Get the byte size of a format element.
100fn element_size(fmt: OutputFormat) -> usize {
101    match fmt {
102        OutputFormat::NamedChar | OutputFormat::PrintableChar => 1,
103        OutputFormat::SignedDec(s)
104        | OutputFormat::Float(s)
105        | OutputFormat::Octal(s)
106        | OutputFormat::UnsignedDec(s)
107        | OutputFormat::Hex(s) => s,
108    }
109}
110
111/// Format a float using C's %g format.
112/// Uses libc snprintf on Unix and Rust formatting on Windows.
113fn snprintf_g(v: f64, precision: usize) -> String {
114    let precision = precision.min(50);
115    #[cfg(unix)]
116    {
117        // Pre-built format strings for common precisions to avoid allocation
118        static FMT_STRINGS: &[&std::ffi::CStr] = &[
119            c"%.0g", c"%.1g", c"%.2g", c"%.3g", c"%.4g", c"%.5g", c"%.6g", c"%.7g", c"%.8g",
120            c"%.9g", c"%.10g", c"%.11g", c"%.12g", c"%.13g", c"%.14g", c"%.15g", c"%.16g",
121            c"%.17g", c"%.18g", c"%.19g", c"%.20g",
122        ];
123        let mut buf = [0u8; 64];
124        let fmt_cstr: std::ffi::CString;
125        let fmt_ptr = if precision < FMT_STRINGS.len() {
126            FMT_STRINGS[precision].as_ptr()
127        } else {
128            fmt_cstr = std::ffi::CString::new(format!("%.{}g", precision)).unwrap();
129            fmt_cstr.as_ptr()
130        };
131        let len =
132            unsafe { libc::snprintf(buf.as_mut_ptr() as *mut libc::c_char, buf.len(), fmt_ptr, v) };
133        if len > 0 && (len as usize) < buf.len() {
134            return String::from_utf8_lossy(&buf[..len as usize]).into_owned();
135        }
136    }
137    // Fallback / Windows: use Rust formatting with %g-like behavior
138    let s = format!("{:.prec$e}", v, prec = precision.saturating_sub(1));
139    // Convert scientific notation to shortest form like %g
140    if let Some(e_pos) = s.find('e') {
141        let exp: i32 = s[e_pos + 1..].parse().unwrap_or(0);
142        if exp >= -(precision as i32) && exp < precision as i32 {
143            // Use fixed notation
144            let fixed = format!(
145                "{:.prec$}",
146                v,
147                prec = (precision as i32 - 1 - exp).max(0) as usize
148            );
149            // Trim trailing zeros after decimal point
150            if fixed.contains('.') {
151                let trimmed = fixed.trim_end_matches('0').trim_end_matches('.');
152                return trimmed.to_string();
153            }
154            return fixed;
155        }
156    }
157    format!("{:.*e}", precision.saturating_sub(1), v)
158}
159
160/// Format f32 like GNU od: uses %.8g formatting (8 significant digits).
161fn format_float_f32(v: f32) -> String {
162    // Use shortest decimal representation that uniquely round-trips (like Ryu / GNU od).
163    // Try increasing precisions from FLT_DIG (6) to FLT_DECIMAL_DIG (9).
164    for prec in 6usize..=9 {
165        let s = snprintf_g(v as f64, prec);
166        if let Ok(reparsed) = s.trim().parse::<f32>() {
167            if reparsed == v {
168                return s;
169            }
170        }
171    }
172    snprintf_g(v as f64, 9)
173}
174
175/// Format f64 like GNU od: uses %.17g formatting.
176fn format_float_f64(v: f64) -> String {
177    snprintf_g(v, 17)
178}
179
180/// Write a formatted value directly to the output, avoiding String allocation.
181#[inline]
182fn write_value(
183    out: &mut impl Write,
184    bytes: &[u8],
185    fmt: OutputFormat,
186    width: usize,
187) -> io::Result<()> {
188    match fmt {
189        OutputFormat::NamedChar => {
190            let b = bytes[0];
191            if b < 128 {
192                write!(out, "{:>w$}", NAMED_CHARS[b as usize], w = width)
193            } else {
194                write!(out, "{:>w$o}", b, w = width)
195            }
196        }
197        OutputFormat::PrintableChar => {
198            let b = bytes[0];
199            let s: &str = match b {
200                0x00 => "\\0",
201                0x07 => "\\a",
202                0x08 => "\\b",
203                0x09 => "\\t",
204                0x0a => "\\n",
205                0x0b => "\\v",
206                0x0c => "\\f",
207                0x0d => "\\r",
208                _ => "",
209            };
210            if !s.is_empty() {
211                write!(out, "{:>w$}", s, w = width)
212            } else if (0x20..=0x7e).contains(&b) {
213                write!(out, "{:>w$}", b as char, w = width)
214            } else {
215                // Octal for non-printable: format as \ooo within width
216                let mut buf = [0u8; 3];
217                buf[0] = b'0' + (b >> 6);
218                buf[1] = b'0' + ((b >> 3) & 7);
219                buf[2] = b'0' + (b & 7);
220                let s = unsafe { std::str::from_utf8_unchecked(&buf) };
221                write!(out, "{:>w$}", s, w = width)
222            }
223        }
224        OutputFormat::Octal(size) => match size {
225            1 => write!(out, " {:03o}", bytes[0]),
226            2 => {
227                let v = u16::from_le_bytes(bytes[..2].try_into().unwrap());
228                write!(out, " {:06o}", v)
229            }
230            4 => {
231                let v = u32::from_le_bytes(bytes[..4].try_into().unwrap());
232                write!(out, " {:011o}", v)
233            }
234            8 => {
235                let v = u64::from_le_bytes(bytes[..8].try_into().unwrap());
236                write!(out, " {:022o}", v)
237            }
238            _ => Ok(()),
239        },
240        OutputFormat::Hex(size) => match size {
241            1 => write!(out, " {:02x}", bytes[0]),
242            2 => {
243                let v = u16::from_le_bytes(bytes[..2].try_into().unwrap());
244                write!(out, " {:04x}", v)
245            }
246            4 => {
247                let v = u32::from_le_bytes(bytes[..4].try_into().unwrap());
248                write!(out, " {:08x}", v)
249            }
250            8 => {
251                let v = u64::from_le_bytes(bytes[..8].try_into().unwrap());
252                write!(out, " {:016x}", v)
253            }
254            _ => Ok(()),
255        },
256        OutputFormat::UnsignedDec(size) => match size {
257            1 => write!(out, "{:>w$}", bytes[0], w = width),
258            2 => {
259                let v = u16::from_le_bytes(bytes[..2].try_into().unwrap());
260                write!(out, "{:>w$}", v, w = width)
261            }
262            4 => {
263                let v = u32::from_le_bytes(bytes[..4].try_into().unwrap());
264                write!(out, "{:>w$}", v, w = width)
265            }
266            8 => {
267                let v = u64::from_le_bytes(bytes[..8].try_into().unwrap());
268                write!(out, "{:>w$}", v, w = width)
269            }
270            _ => Ok(()),
271        },
272        OutputFormat::SignedDec(size) => match size {
273            1 => write!(out, "{:>w$}", bytes[0] as i8, w = width),
274            2 => {
275                let v = i16::from_le_bytes(bytes[..2].try_into().unwrap());
276                write!(out, "{:>w$}", v, w = width)
277            }
278            4 => {
279                let v = i32::from_le_bytes(bytes[..4].try_into().unwrap());
280                write!(out, "{:>w$}", v, w = width)
281            }
282            8 => {
283                let v = i64::from_le_bytes(bytes[..8].try_into().unwrap());
284                write!(out, "{:>w$}", v, w = width)
285            }
286            _ => Ok(()),
287        },
288        OutputFormat::Float(size) => match size {
289            4 => {
290                let v = f32::from_le_bytes(bytes[..4].try_into().unwrap());
291                write!(out, "{:>w$}", format_float_f32(v), w = width)
292            }
293            8 => {
294                let v = f64::from_le_bytes(bytes[..8].try_into().unwrap());
295                write!(out, "{:>w$}", format_float_f64(v), w = width)
296            }
297            _ => Ok(()),
298        },
299    }
300}
301
302/// Write one line of output for a given format type directly to the writer.
303fn write_format_line(
304    out: &mut impl Write,
305    chunk: &[u8],
306    fmt: OutputFormat,
307    line_width: usize,
308    is_first_format: bool,
309    radix: AddressRadix,
310    offset: u64,
311    z_annotate: bool,
312) -> io::Result<()> {
313    // Address prefix
314    if is_first_format {
315        match radix {
316            AddressRadix::Octal => write!(out, "{:07o}", offset)?,
317            AddressRadix::Decimal => write!(out, "{:07}", offset)?,
318            AddressRadix::Hex => write!(out, "{:06x}", offset)?,
319            AddressRadix::None => {}
320        }
321    } else if radix != AddressRadix::None {
322        let addr_width = match radix {
323            AddressRadix::Octal | AddressRadix::Decimal => 7,
324            AddressRadix::Hex => 6,
325            AddressRadix::None => 0,
326        };
327        for _ in 0..addr_width {
328            out.write_all(b" ")?;
329        }
330    }
331
332    let elem_sz = element_size(fmt);
333    let fw = field_width(fmt);
334    let num_elems = line_width / elem_sz;
335    let actual_full = chunk.len() / elem_sz;
336    let remainder = chunk.len() % elem_sz;
337
338    for i in 0..num_elems {
339        if i < actual_full {
340            let start = i * elem_sz;
341            let end = start + elem_sz;
342            write_value(out, &chunk[start..end], fmt, fw)?;
343        } else if i == actual_full && remainder > 0 {
344            let start = i * elem_sz;
345            let mut padded = [0u8; 8]; // max element size is 8
346            padded[..remainder].copy_from_slice(&chunk[start..]);
347            write_value(out, &padded[..elem_sz], fmt, fw)?;
348        }
349    }
350
351    // Append printable ASCII annotation if 'z' suffix was used
352    if z_annotate {
353        // Pad remaining columns to align the annotation
354        let used_cols = actual_full + if remainder > 0 { 1 } else { 0 };
355        for _ in used_cols..num_elems {
356            for _ in 0..fw {
357                out.write_all(b" ")?;
358            }
359        }
360        out.write_all(b"  >")?;
361        for &b in chunk {
362            if b.is_ascii_graphic() || b == b' ' {
363                out.write_all(&[b])?;
364            } else {
365                out.write_all(b".")?;
366            }
367        }
368        out.write_all(b"<")?;
369    }
370
371    writeln!(out)?;
372    Ok(())
373}
374
375/// Parse a format type string (the TYPE argument of -t).
376/// Returns the format and whether the 'z' suffix was present.
377pub fn parse_format_type(s: &str) -> Result<(OutputFormat, bool), String> {
378    if s.is_empty() {
379        return Err("empty format string".to_string());
380    }
381
382    // Strip trailing 'z' suffix (printable ASCII annotation)
383    let (s, z_annotate) = if s.len() > 1 && s.ends_with('z') {
384        (&s[..s.len() - 1], true)
385    } else {
386        (s, false)
387    };
388
389    let mut chars = s.chars();
390    let type_char = chars.next().unwrap();
391    let size_str: String = chars.collect();
392
393    let fmt = match type_char {
394        'a' => Ok(OutputFormat::NamedChar),
395        'c' => Ok(OutputFormat::PrintableChar),
396        'd' => {
397            let size = if size_str.is_empty() {
398                4
399            } else {
400                parse_size_spec(&size_str, "d")?
401            };
402            Ok(OutputFormat::SignedDec(size))
403        }
404        'f' => {
405            let size = if size_str.is_empty() {
406                4
407            } else {
408                parse_float_size(&size_str)?
409            };
410            Ok(OutputFormat::Float(size))
411        }
412        'o' => {
413            let size = if size_str.is_empty() {
414                2
415            } else {
416                parse_size_spec(&size_str, "o")?
417            };
418            Ok(OutputFormat::Octal(size))
419        }
420        'u' => {
421            let size = if size_str.is_empty() {
422                4
423            } else {
424                parse_size_spec(&size_str, "u")?
425            };
426            Ok(OutputFormat::UnsignedDec(size))
427        }
428        'x' => {
429            let size = if size_str.is_empty() {
430                2
431            } else {
432                parse_size_spec(&size_str, "x")?
433            };
434            Ok(OutputFormat::Hex(size))
435        }
436        _ => Err(format!("invalid type string '{}'", s)),
437    }?;
438    Ok((fmt, z_annotate))
439}
440
441fn parse_size_spec(s: &str, type_name: &str) -> Result<usize, String> {
442    // Accept C, S, I, L or a number
443    match s {
444        "C" => Ok(1),
445        "S" => Ok(2),
446        "I" => Ok(4),
447        "L" => Ok(8),
448        _ => {
449            let n: usize = s
450                .parse()
451                .map_err(|_| format!("invalid type string '{}{}': invalid size", type_name, s))?;
452            match n {
453                1 | 2 | 4 | 8 => Ok(n),
454                _ => Err(format!(
455                    "invalid type string '{}{}': invalid size",
456                    type_name, s
457                )),
458            }
459        }
460    }
461}
462
463fn parse_float_size(s: &str) -> Result<usize, String> {
464    match s {
465        "F" | "4" => Ok(4),
466        "D" | "8" => Ok(8),
467        "L" | "16" => Err("16-byte float not supported".to_string()),
468        _ => {
469            let n: usize = s
470                .parse()
471                .map_err(|_| format!("invalid float size '{}'", s))?;
472            match n {
473                4 | 8 => Ok(n),
474                _ => Err(format!("invalid float size '{}'", s)),
475            }
476        }
477    }
478}
479
480/// Process input and produce od output.
481pub fn od_process<R: Read, W: Write>(
482    mut input: R,
483    output: &mut W,
484    config: &OdConfig,
485) -> io::Result<()> {
486    // Skip bytes
487    if config.skip_bytes > 0 {
488        let mut to_skip = config.skip_bytes;
489        let mut skip_buf = [0u8; 8192];
490        while to_skip > 0 {
491            let chunk_size = std::cmp::min(to_skip, skip_buf.len() as u64) as usize;
492            let n = input.read(&mut skip_buf[..chunk_size])?;
493            if n == 0 {
494                break;
495            }
496            to_skip -= n as u64;
497        }
498    }
499
500    // Read all data (respecting read_bytes limit)
501    let data = match config.read_bytes {
502        Some(limit) => {
503            let mut buf = Vec::new();
504            let mut limited = input.take(limit);
505            limited.read_to_end(&mut buf)?;
506            buf
507        }
508        None => {
509            let mut buf = Vec::new();
510            input.read_to_end(&mut buf)?;
511            buf
512        }
513    };
514
515    let width = config.width;
516    let mut offset = config.skip_bytes;
517    let mut prev_chunk: Option<Vec<u8>> = None;
518    let mut star_printed = false;
519
520    let mut pos = 0;
521    while pos < data.len() {
522        let end = std::cmp::min(pos + width, data.len());
523        let chunk = &data[pos..end];
524
525        // Duplicate suppression
526        if !config.show_duplicates && chunk.len() == width {
527            if let Some(ref prev) = prev_chunk {
528                if prev.as_slice() == chunk {
529                    if !star_printed {
530                        writeln!(output, "*")?;
531                        star_printed = true;
532                    }
533                    pos += width;
534                    offset += width as u64;
535                    continue;
536                }
537            }
538        }
539
540        star_printed = false;
541
542        for (i, fmt) in config.formats.iter().enumerate() {
543            let z = config.z_flags.get(i).copied().unwrap_or(false);
544            write_format_line(
545                output,
546                chunk,
547                *fmt,
548                width,
549                i == 0,
550                config.address_radix,
551                offset,
552                z,
553            )?;
554        }
555
556        prev_chunk = Some(chunk.to_vec());
557        pos += width;
558        offset += width as u64;
559    }
560
561    // Final address line
562    if config.address_radix != AddressRadix::None {
563        let final_offset = config.skip_bytes + data.len() as u64;
564        match config.address_radix {
565            AddressRadix::Octal => writeln!(output, "{:07o}", final_offset)?,
566            AddressRadix::Decimal => writeln!(output, "{:07}", final_offset)?,
567            AddressRadix::Hex => writeln!(output, "{:06x}", final_offset)?,
568            AddressRadix::None => {}
569        }
570    }
571
572    Ok(())
573}
coreutils_rs/od/core.rs

coreutils_rs/od/
core.rs