Skip to main content

coreutils_rs/od/
core.rs

1use std::io::{self, Read, Write};
2
3/// Address radix for the offset column.
4#[derive(Debug, Clone, Copy, PartialEq, Eq)]
5pub enum AddressRadix {
6    Octal,
7    Decimal,
8    Hex,
9    None,
10}
11
12/// Output format specifier.
13#[derive(Debug, Clone, Copy, PartialEq, Eq)]
14pub enum OutputFormat {
15    /// Named character (a): nul, soh, stx, ...
16    NamedChar,
17    /// Printable character or backslash escape (c): \0, \a, \b, \t, \n, ...
18    PrintableChar,
19    /// Signed decimal integer of given byte size (d1, d2, d4, d8)
20    SignedDec(usize),
21    /// Floating point of given byte size (f4, f8)
22    Float(usize),
23    /// Octal integer of given byte size (o1, o2, o4)
24    Octal(usize),
25    /// Unsigned decimal integer of given byte size (u1, u2, u4, u8)
26    UnsignedDec(usize),
27    /// Hexadecimal integer of given byte size (x1, x2, x4, x8)
28    Hex(usize),
29}
30
31/// Configuration for the od command.
32#[derive(Debug, Clone)]
33pub struct OdConfig {
34    pub address_radix: AddressRadix,
35    pub formats: Vec<OutputFormat>,
36    pub skip_bytes: u64,
37    pub read_bytes: Option<u64>,
38    pub width: usize,
39    pub show_duplicates: bool,
40}
41
42impl Default for OdConfig {
43    fn default() -> Self {
44        Self {
45            address_radix: AddressRadix::Octal,
46            formats: vec![OutputFormat::Octal(2)],
47            skip_bytes: 0,
48            read_bytes: None,
49            width: 16,
50            show_duplicates: false,
51        }
52    }
53}
54
55/// Named characters for -t a format (ASCII named characters).
56/// Index 0..=127 maps to the name for that byte value.
57const NAMED_CHARS: [&str; 128] = [
58    "nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel", " bs", " ht", " nl", " vt", " ff",
59    " cr", " so", " si", "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb", "can", " em",
60    "sub", "esc", " fs", " gs", " rs", " us", " sp", "!", "\"", "#", "$", "%", "&", "'", "(", ")",
61    "*", "+", ",", "-", ".", "/", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", ":", ";", "<",
62    "=", ">", "?", "@", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O",
63    "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "[", "\\", "]", "^", "_", "`", "a", "b",
64    "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u",
65    "v", "w", "x", "y", "z", "{", "|", "}", "~", "del",
66];
67
68/// Format an address according to the radix.
69fn format_address(offset: u64, radix: AddressRadix) -> String {
70    match radix {
71        AddressRadix::Octal => format!("{:07o}", offset),
72        AddressRadix::Decimal => format!("{:07}", offset),
73        AddressRadix::Hex => format!("{:06x}", offset),
74        AddressRadix::None => String::new(),
75    }
76}
77
78/// Return the field width for a single value of the given format.
79/// This matches GNU od's column widths.
80fn field_width(fmt: OutputFormat) -> usize {
81    match fmt {
82        OutputFormat::NamedChar => 4, // 3 chars + leading space => " nul" = 4 wide
83        OutputFormat::PrintableChar => 4, // 3 chars + leading space => "  \\n" = 4 wide
84        OutputFormat::Octal(1) => 4,  // " 377"
85        OutputFormat::Octal(2) => 7,  // " 177777"
86        OutputFormat::Octal(4) => 12, // " 37777777777"
87        OutputFormat::Octal(8) => 23, // " 1777777777777777777777"
88        OutputFormat::Hex(1) => 3,    // " ff"
89        OutputFormat::Hex(2) => 5,    // " ffff"
90        OutputFormat::Hex(4) => 9,    // " ffffffff"
91        OutputFormat::Hex(8) => 17,   // " ffffffffffffffff"
92        OutputFormat::UnsignedDec(1) => 4, // " 255"
93        OutputFormat::UnsignedDec(2) => 6, // " 65535"
94        OutputFormat::UnsignedDec(4) => 11, // " 4294967295"
95        OutputFormat::UnsignedDec(8) => 21, // " 18446744073709551615"
96        OutputFormat::SignedDec(1) => 5, // " -128"
97        OutputFormat::SignedDec(2) => 7, // " -32768"
98        OutputFormat::SignedDec(4) => 12, // " -2147483648"
99        OutputFormat::SignedDec(8) => 21, // " -9223372036854775808"
100        OutputFormat::Float(4) => 15, // " -x.xxxxxxxe+xx"
101        OutputFormat::Float(8) => 25, // " -x.xxxxxxxxxxxxxxe+xxx"
102        _ => 4,
103    }
104}
105
106/// Get the byte size of a format element.
107fn element_size(fmt: OutputFormat) -> usize {
108    match fmt {
109        OutputFormat::NamedChar | OutputFormat::PrintableChar => 1,
110        OutputFormat::SignedDec(s)
111        | OutputFormat::Float(s)
112        | OutputFormat::Octal(s)
113        | OutputFormat::UnsignedDec(s)
114        | OutputFormat::Hex(s) => s,
115    }
116}
117
118/// Format a single value for the given format.
119fn format_value(bytes: &[u8], fmt: OutputFormat, width: usize) -> String {
120    match fmt {
121        OutputFormat::NamedChar => {
122            let b = bytes[0];
123            if b < 128 {
124                format!("{:>w$}", NAMED_CHARS[b as usize], w = width)
125            } else {
126                format!("{:>w$}", format!("{:03o}", b), w = width)
127            }
128        }
129        OutputFormat::PrintableChar => {
130            let b = bytes[0];
131            let s = match b {
132                0x00 => "\\0".to_string(),
133                0x07 => "\\a".to_string(),
134                0x08 => "\\b".to_string(),
135                0x09 => "\\t".to_string(),
136                0x0a => "\\n".to_string(),
137                0x0b => "\\v".to_string(),
138                0x0c => "\\f".to_string(),
139                0x0d => "\\r".to_string(),
140                0x20..=0x7e => format!("{}", b as char),
141                _ => format!("{:03o}", b),
142            };
143            format!("{:>w$}", s, w = width)
144        }
145        OutputFormat::Octal(size) => match size {
146            1 => format!("{:>w$}", format!("{:03o}", bytes[0]), w = width),
147            2 => {
148                let v = u16::from_le_bytes(bytes[..2].try_into().unwrap());
149                format!("{:>w$}", format!("{:06o}", v), w = width)
150            }
151            4 => {
152                let v = u32::from_le_bytes(bytes[..4].try_into().unwrap());
153                format!("{:>w$}", format!("{:011o}", v), w = width)
154            }
155            8 => {
156                let v = u64::from_le_bytes(bytes[..8].try_into().unwrap());
157                format!("{:>w$}", format!("{:022o}", v), w = width)
158            }
159            _ => String::new(),
160        },
161        OutputFormat::Hex(size) => match size {
162            1 => format!("{:>w$}", format!("{:02x}", bytes[0]), w = width),
163            2 => {
164                let v = u16::from_le_bytes(bytes[..2].try_into().unwrap());
165                format!("{:>w$}", format!("{:04x}", v), w = width)
166            }
167            4 => {
168                let v = u32::from_le_bytes(bytes[..4].try_into().unwrap());
169                format!("{:>w$}", format!("{:08x}", v), w = width)
170            }
171            8 => {
172                let v = u64::from_le_bytes(bytes[..8].try_into().unwrap());
173                format!("{:>w$}", format!("{:016x}", v), w = width)
174            }
175            _ => String::new(),
176        },
177        OutputFormat::UnsignedDec(size) => match size {
178            1 => format!("{:>w$}", bytes[0], w = width),
179            2 => {
180                let v = u16::from_le_bytes(bytes[..2].try_into().unwrap());
181                format!("{:>w$}", v, w = width)
182            }
183            4 => {
184                let v = u32::from_le_bytes(bytes[..4].try_into().unwrap());
185                format!("{:>w$}", v, w = width)
186            }
187            8 => {
188                let v = u64::from_le_bytes(bytes[..8].try_into().unwrap());
189                format!("{:>w$}", v, w = width)
190            }
191            _ => String::new(),
192        },
193        OutputFormat::SignedDec(size) => match size {
194            1 => format!("{:>w$}", bytes[0] as i8, w = width),
195            2 => {
196                let v = i16::from_le_bytes(bytes[..2].try_into().unwrap());
197                format!("{:>w$}", v, w = width)
198            }
199            4 => {
200                let v = i32::from_le_bytes(bytes[..4].try_into().unwrap());
201                format!("{:>w$}", v, w = width)
202            }
203            8 => {
204                let v = i64::from_le_bytes(bytes[..8].try_into().unwrap());
205                format!("{:>w$}", v, w = width)
206            }
207            _ => String::new(),
208        },
209        OutputFormat::Float(size) => match size {
210            4 => {
211                let v = f32::from_le_bytes(bytes[..4].try_into().unwrap());
212                format!("{:>w$}", format_float_f32(v), w = width)
213            }
214            8 => {
215                let v = f64::from_le_bytes(bytes[..8].try_into().unwrap());
216                format!("{:>w$}", format_float_f64(v), w = width)
217            }
218            _ => String::new(),
219        },
220    }
221}
222
223/// Format a float using C's %g format.
224/// Uses libc snprintf on Unix and Rust formatting on Windows.
225fn snprintf_g(v: f64, precision: usize) -> String {
226    let precision = precision.min(50);
227    #[cfg(unix)]
228    {
229        let mut buf = [0u8; 64];
230        let fmt = std::ffi::CString::new(format!("%.{}g", precision)).unwrap();
231        let len = unsafe {
232            libc::snprintf(
233                buf.as_mut_ptr() as *mut libc::c_char,
234                buf.len(),
235                fmt.as_ptr(),
236                v,
237            )
238        };
239        if len > 0 && (len as usize) < buf.len() {
240            return String::from_utf8_lossy(&buf[..len as usize]).into_owned();
241        }
242    }
243    // Fallback / Windows: use Rust formatting with %g-like behavior
244    let s = format!("{:.prec$e}", v, prec = precision.saturating_sub(1));
245    // Convert scientific notation to shortest form like %g
246    if let Some(e_pos) = s.find('e') {
247        let exp: i32 = s[e_pos + 1..].parse().unwrap_or(0);
248        if exp >= -(precision as i32) && exp < precision as i32 {
249            // Use fixed notation
250            let fixed = format!(
251                "{:.prec$}",
252                v,
253                prec = (precision as i32 - 1 - exp).max(0) as usize
254            );
255            // Trim trailing zeros after decimal point
256            if fixed.contains('.') {
257                let trimmed = fixed.trim_end_matches('0').trim_end_matches('.');
258                return trimmed.to_string();
259            }
260            return fixed;
261        }
262    }
263    format!("{:.*e}", precision.saturating_sub(1), v)
264}
265
266/// Format f32 like GNU od: uses %.7g formatting.
267fn format_float_f32(v: f32) -> String {
268    snprintf_g(v as f64, 7)
269}
270
271/// Format f64 like GNU od: uses %.17g formatting.
272fn format_float_f64(v: f64) -> String {
273    snprintf_g(v, 17)
274}
275
276/// Format one line of output for a given format type.
277fn format_line(
278    chunk: &[u8],
279    fmt: OutputFormat,
280    line_width: usize,
281    is_first_format: bool,
282    radix: AddressRadix,
283    offset: u64,
284) -> String {
285    let mut line = String::new();
286
287    // Address prefix
288    if is_first_format {
289        line.push_str(&format_address(offset, radix));
290    } else if radix != AddressRadix::None {
291        // Continuation lines: pad with spaces to match address width
292        let addr_width = match radix {
293            AddressRadix::Octal => 7,
294            AddressRadix::Decimal => 7,
295            AddressRadix::Hex => 6,
296            AddressRadix::None => 0,
297        };
298        for _ in 0..addr_width {
299            line.push(' ');
300        }
301    }
302
303    let elem_sz = element_size(fmt);
304    let fw = field_width(fmt);
305
306    // Number of full elements in this chunk
307    let num_elems = line_width / elem_sz;
308
309    // How many elements we can actually format from this (possibly short) chunk
310    let actual_full = chunk.len() / elem_sz;
311    let remainder = chunk.len() % elem_sz;
312
313    for i in 0..num_elems {
314        if i < actual_full {
315            let start = i * elem_sz;
316            let end = start + elem_sz;
317            line.push_str(&format_value(&chunk[start..end], fmt, fw));
318        } else if i == actual_full && remainder > 0 {
319            // Partial element at the end: pad with zeros
320            let start = i * elem_sz;
321            let mut padded = vec![0u8; elem_sz];
322            padded[..remainder].copy_from_slice(&chunk[start..]);
323            line.push_str(&format_value(&padded, fmt, fw));
324        }
325    }
326
327    line
328}
329
330/// Parse a format type string (the TYPE argument of -t).
331pub fn parse_format_type(s: &str) -> Result<OutputFormat, String> {
332    if s.is_empty() {
333        return Err("empty format string".to_string());
334    }
335
336    let mut chars = s.chars();
337    let type_char = chars.next().unwrap();
338    let size_str: String = chars.collect();
339
340    match type_char {
341        'a' => Ok(OutputFormat::NamedChar),
342        'c' => Ok(OutputFormat::PrintableChar),
343        'd' => {
344            let size = if size_str.is_empty() {
345                4
346            } else {
347                parse_size_spec(&size_str, "d")?
348            };
349            Ok(OutputFormat::SignedDec(size))
350        }
351        'f' => {
352            let size = if size_str.is_empty() {
353                4
354            } else {
355                parse_float_size(&size_str)?
356            };
357            Ok(OutputFormat::Float(size))
358        }
359        'o' => {
360            let size = if size_str.is_empty() {
361                2
362            } else {
363                parse_size_spec(&size_str, "o")?
364            };
365            Ok(OutputFormat::Octal(size))
366        }
367        'u' => {
368            let size = if size_str.is_empty() {
369                4
370            } else {
371                parse_size_spec(&size_str, "u")?
372            };
373            Ok(OutputFormat::UnsignedDec(size))
374        }
375        'x' => {
376            let size = if size_str.is_empty() {
377                2
378            } else {
379                parse_size_spec(&size_str, "x")?
380            };
381            Ok(OutputFormat::Hex(size))
382        }
383        _ => Err(format!("invalid type string '{}'", s)),
384    }
385}
386
387fn parse_size_spec(s: &str, type_name: &str) -> Result<usize, String> {
388    // Accept C, S, I, L or a number
389    match s {
390        "C" => Ok(1),
391        "S" => Ok(2),
392        "I" => Ok(4),
393        "L" => Ok(8),
394        _ => {
395            let n: usize = s
396                .parse()
397                .map_err(|_| format!("invalid type string '{}{}': invalid size", type_name, s))?;
398            match n {
399                1 | 2 | 4 | 8 => Ok(n),
400                _ => Err(format!(
401                    "invalid type string '{}{}': invalid size",
402                    type_name, s
403                )),
404            }
405        }
406    }
407}
408
409fn parse_float_size(s: &str) -> Result<usize, String> {
410    match s {
411        "F" | "4" => Ok(4),
412        "D" | "8" => Ok(8),
413        "L" | "16" => Err("16-byte float not supported".to_string()),
414        _ => {
415            let n: usize = s
416                .parse()
417                .map_err(|_| format!("invalid float size '{}'", s))?;
418            match n {
419                4 | 8 => Ok(n),
420                _ => Err(format!("invalid float size '{}'", s)),
421            }
422        }
423    }
424}
425
426/// Process input and produce od output.
427pub fn od_process<R: Read, W: Write>(
428    mut input: R,
429    output: &mut W,
430    config: &OdConfig,
431) -> io::Result<()> {
432    // Skip bytes
433    if config.skip_bytes > 0 {
434        let mut to_skip = config.skip_bytes;
435        let mut skip_buf = [0u8; 8192];
436        while to_skip > 0 {
437            let chunk_size = std::cmp::min(to_skip, skip_buf.len() as u64) as usize;
438            let n = input.read(&mut skip_buf[..chunk_size])?;
439            if n == 0 {
440                break;
441            }
442            to_skip -= n as u64;
443        }
444    }
445
446    // Read all data (respecting read_bytes limit)
447    let data = match config.read_bytes {
448        Some(limit) => {
449            let mut buf = Vec::new();
450            let mut limited = input.take(limit);
451            limited.read_to_end(&mut buf)?;
452            buf
453        }
454        None => {
455            let mut buf = Vec::new();
456            input.read_to_end(&mut buf)?;
457            buf
458        }
459    };
460
461    let width = config.width;
462    let mut offset = config.skip_bytes;
463    let mut prev_chunk: Option<Vec<u8>> = None;
464    let mut star_printed = false;
465
466    let mut pos = 0;
467    while pos < data.len() {
468        let end = std::cmp::min(pos + width, data.len());
469        let chunk = &data[pos..end];
470
471        // Duplicate suppression
472        if !config.show_duplicates && chunk.len() == width {
473            if let Some(ref prev) = prev_chunk {
474                if prev.as_slice() == chunk {
475                    if !star_printed {
476                        writeln!(output, "*")?;
477                        star_printed = true;
478                    }
479                    pos += width;
480                    offset += width as u64;
481                    continue;
482                }
483            }
484        }
485
486        star_printed = false;
487
488        for (i, fmt) in config.formats.iter().enumerate() {
489            let line = format_line(chunk, *fmt, width, i == 0, config.address_radix, offset);
490            writeln!(output, "{}", line)?;
491        }
492
493        prev_chunk = Some(chunk.to_vec());
494        pos += width;
495        offset += width as u64;
496    }
497
498    // Final address line
499    if config.address_radix != AddressRadix::None {
500        // The final offset is skip_bytes + actual data length
501        let final_offset = config.skip_bytes + data.len() as u64;
502        writeln!(
503            output,
504            "{}",
505            format_address(final_offset, config.address_radix)
506        )?;
507    }
508
509    Ok(())
510}