Skip to main content

linuxutils_text/
hexdump.rs

1use linuxutils_common::man::ManContent;
2
3pub const MAN: ManContent = ManContent::empty();
4
5use clap::Parser;
6use std::{
7    io::{self, Read, Write},
8    process::ExitCode,
9};
10
11#[derive(Parser)]
12#[command(
13    name = "hexdump",
14    about = "Display file contents in hexadecimal, decimal, octal, or ascii"
15)]
16pub struct Args {
17    /// One-byte octal display
18    #[arg(short = 'b', long = "one-byte-octal")]
19    one_byte_octal: bool,
20
21    /// One-byte hex display
22    #[arg(short = 'X', long = "one-byte-hex")]
23    one_byte_hex: bool,
24
25    /// One-byte character display
26    #[arg(short = 'c', long = "one-byte-char")]
27    one_byte_char: bool,
28
29    /// Canonical hex+ASCII display
30    #[arg(short = 'C', long)]
31    canonical: bool,
32
33    /// Two-byte decimal display
34    #[arg(short = 'd', long = "two-bytes-decimal")]
35    two_bytes_decimal: bool,
36
37    /// Two-byte octal display
38    #[arg(short = 'o', long = "two-bytes-octal")]
39    two_bytes_octal: bool,
40
41    /// Two-byte hexadecimal display
42    #[arg(short = 'x', long = "two-bytes-hex")]
43    two_bytes_hex: bool,
44
45    /// Interpret only length bytes of input
46    #[arg(short = 'n', long)]
47    length: Option<u64>,
48
49    /// Skip offset bytes from the beginning
50    #[arg(short, long)]
51    skip: Option<u64>,
52
53    /// Display all data (don't squeeze identical lines)
54    #[arg(short = 'v', long = "no-squeezing")]
55    no_squeezing: bool,
56
57    /// Files to display
58    #[arg()]
59    files: Vec<String>,
60}
61
62#[derive(Clone, Copy)]
63enum Mode {
64    Default,
65    Canonical,
66    OneByteOctal,
67    OneByteHex,
68    OneByteChar,
69    TwoBytesDecimal,
70    TwoBytesOctal,
71    TwoBytesHex,
72}
73
74fn parse_suffix_value(s: &str) -> Option<u64> {
75    let s = s.trim();
76    if s.is_empty() {
77        return None;
78    }
79    // Try with hex prefix.
80    if let Some(hex) = s.strip_prefix("0x").or_else(|| s.strip_prefix("0X")) {
81        return u64::from_str_radix(hex, 16).ok();
82    }
83    // Try with suffix.
84    let (num, mult) = if let Some(n) =
85        s.strip_suffix("KiB").or_else(|| s.strip_suffix("K"))
86    {
87        (n, 1024u64)
88    } else if let Some(n) =
89        s.strip_suffix("MiB").or_else(|| s.strip_suffix("M"))
90    {
91        (n, 1024 * 1024)
92    } else if let Some(n) =
93        s.strip_suffix("GiB").or_else(|| s.strip_suffix("G"))
94    {
95        (n, 1024 * 1024 * 1024)
96    } else {
97        (s, 1)
98    };
99    num.trim().parse::<u64>().ok().map(|n| n * mult)
100}
101
102struct InputReader {
103    sources: Vec<Box<dyn Read>>,
104    current: usize,
105    remaining: Option<u64>,
106}
107
108impl InputReader {
109    fn new(
110        files: &[String],
111        skip: Option<u64>,
112        length: Option<u64>,
113    ) -> io::Result<Self> {
114        let mut sources: Vec<Box<dyn Read>> = if files.is_empty() {
115            vec![Box::new(io::stdin())]
116        } else {
117            let mut v: Vec<Box<dyn Read>> = Vec::new();
118            for f in files {
119                v.push(Box::new(std::fs::File::open(f)?));
120            }
121            v
122        };
123
124        // Skip bytes.
125        if let Some(skip) = skip {
126            let mut to_skip = skip;
127            let mut buf = [0u8; 8192];
128            while to_skip > 0 && !sources.is_empty() {
129                let n = (to_skip as usize).min(buf.len());
130                match sources[0].read(&mut buf[..n]) {
131                    Ok(0) => {
132                        sources.remove(0);
133                    }
134                    Ok(read) => to_skip -= read as u64,
135                    Err(e) => return Err(e),
136                }
137            }
138        }
139
140        Ok(Self {
141            sources,
142            current: 0,
143            remaining: length,
144        })
145    }
146
147    fn read_exact_or_eof(&mut self, buf: &mut [u8]) -> io::Result<usize> {
148        let max = if let Some(rem) = self.remaining {
149            buf.len().min(rem as usize)
150        } else {
151            buf.len()
152        };
153
154        if max == 0 {
155            return Ok(0);
156        }
157
158        let mut total = 0;
159        while total < max && self.current < self.sources.len() {
160            match self.sources[self.current].read(&mut buf[total..max]) {
161                Ok(0) => self.current += 1,
162                Ok(n) => total += n,
163                Err(e) => return Err(e),
164            }
165        }
166
167        if let Some(ref mut rem) = self.remaining {
168            *rem -= total as u64;
169        }
170        Ok(total)
171    }
172}
173
174fn format_canonical(
175    data: &[u8],
176    offset: u64,
177    out: &mut dyn Write,
178) -> io::Result<()> {
179    write!(out, "{offset:08x}  ")?;
180    for i in 0..16 {
181        if i < data.len() {
182            write!(out, "{:02x} ", data[i])?;
183        } else {
184            write!(out, "   ")?;
185        }
186        if i == 7 {
187            write!(out, " ")?;
188        }
189    }
190    write!(out, " |")?;
191    for &c in data.iter().take(16) {
192        if c.is_ascii_graphic() || c == b' ' {
193            write!(out, "{}", c as char)?;
194        } else {
195            write!(out, ".")?;
196        }
197    }
198    writeln!(out, "|")
199}
200
201fn format_one_byte_octal(
202    data: &[u8],
203    offset: u64,
204    out: &mut dyn Write,
205) -> io::Result<()> {
206    write!(out, "{offset:07x}")?;
207    for &b in data.iter().take(16) {
208        write!(out, " {:03o}", b)?;
209    }
210    writeln!(out)
211}
212
213fn format_one_byte_hex(
214    data: &[u8],
215    offset: u64,
216    out: &mut dyn Write,
217) -> io::Result<()> {
218    write!(out, "{offset:07x}")?;
219    for &b in data.iter().take(16) {
220        write!(out, " {:02x}", b)?;
221    }
222    writeln!(out)
223}
224
225fn format_one_byte_char(
226    data: &[u8],
227    offset: u64,
228    out: &mut dyn Write,
229) -> io::Result<()> {
230    write!(out, "{offset:07x}")?;
231    for &b in data.iter().take(16) {
232        let s = match b {
233            0 => " \\0".to_string(),
234            7 => " \\a".to_string(),
235            8 => " \\b".to_string(),
236            9 => " \\t".to_string(),
237            10 => " \\n".to_string(),
238            11 => " \\v".to_string(),
239            12 => " \\f".to_string(),
240            13 => " \\r".to_string(),
241            b if b.is_ascii_graphic() || b == b' ' => {
242                format!("   {}", b as char)
243            }
244            b => format!(" {:03o}", b),
245        };
246        write!(out, "{s}")?;
247    }
248    writeln!(out)
249}
250
251fn format_two_bytes(
252    data: &[u8],
253    offset: u64,
254    out: &mut dyn Write,
255    offset_fmt: OffsetFmt,
256    value_fmt: &str,
257) -> io::Result<()> {
258    match offset_fmt {
259        OffsetFmt::Hex7 => write!(out, "{offset:07x}")?,
260    }
261    let mut i = 0;
262    while i + 1 < data.len() && i < 16 {
263        let val = u16::from_le_bytes([data[i], data[i + 1]]);
264        match value_fmt {
265            "x4" => write!(out, "    {:04x}", val)?,
266            "x4d" => write!(out, " {:04x}", val)?, // default: tighter
267            "d5" => write!(out, "   {:05}", val)?,
268            "o6" => write!(out, "  {:06o}", val)?,
269            _ => write!(out, " {:04x}", val)?,
270        }
271        i += 2;
272    }
273    if i < data.len() && i < 16 {
274        let val = data[i] as u16;
275        match value_fmt {
276            "x4" | "x4d" => write!(out, "    {:04x}", val)?,
277            "d5" => write!(out, "   {:05}", val)?,
278            "o6" => write!(out, "  {:06o}", val)?,
279            _ => write!(out, " {:04x}", val)?,
280        }
281    }
282    // Pad to standard width for short final lines.
283    writeln!(out)
284}
285
286#[derive(Clone, Copy)]
287enum OffsetFmt {
288    Hex7,
289}
290
291pub fn run(args: Args) -> ExitCode {
292    let mode = if args.canonical {
293        Mode::Canonical
294    } else if args.one_byte_octal {
295        Mode::OneByteOctal
296    } else if args.one_byte_hex {
297        Mode::OneByteHex
298    } else if args.one_byte_char {
299        Mode::OneByteChar
300    } else if args.two_bytes_decimal {
301        Mode::TwoBytesDecimal
302    } else if args.two_bytes_octal {
303        Mode::TwoBytesOctal
304    } else if args.two_bytes_hex {
305        Mode::TwoBytesHex
306    } else {
307        Mode::Default
308    };
309
310    let skip = args
311        .skip
312        .or_else(|| args.skip.and_then(|s| parse_suffix_value(&s.to_string())));
313
314    let mut reader = match InputReader::new(&args.files, skip, args.length) {
315        Ok(r) => r,
316        Err(e) => {
317            eprintln!("hexdump: {e}");
318            return ExitCode::FAILURE;
319        }
320    };
321
322    let stdout = io::stdout();
323    let mut out = stdout.lock();
324    let mut offset = skip.unwrap_or(0);
325    let mut buf = [0u8; 16];
326    let mut prev_line: Option<Vec<u8>> = None;
327    let mut squeezed = false;
328
329    let format_line =
330        |data: &[u8], offset: u64, out: &mut dyn Write| -> io::Result<()> {
331            match mode {
332                Mode::Canonical => format_canonical(data, offset, out),
333                Mode::OneByteOctal => format_one_byte_octal(data, offset, out),
334                Mode::OneByteHex => format_one_byte_hex(data, offset, out),
335                Mode::OneByteChar => format_one_byte_char(data, offset, out),
336                Mode::TwoBytesHex => {
337                    format_two_bytes(data, offset, out, OffsetFmt::Hex7, "x4")
338                }
339                Mode::TwoBytesDecimal => {
340                    format_two_bytes(data, offset, out, OffsetFmt::Hex7, "d5")
341                }
342                Mode::TwoBytesOctal => {
343                    format_two_bytes(data, offset, out, OffsetFmt::Hex7, "o6")
344                }
345                Mode::Default => {
346                    format_two_bytes(data, offset, out, OffsetFmt::Hex7, "x4d")
347                }
348            }
349        };
350
351    loop {
352        let n = match reader.read_exact_or_eof(&mut buf) {
353            Ok(n) => n,
354            Err(e) => {
355                eprintln!("hexdump: {e}");
356                return ExitCode::FAILURE;
357            }
358        };
359
360        if n == 0 {
361            break;
362        }
363
364        let line = &buf[..n];
365
366        // Squeezing: skip identical lines.
367        if !args.no_squeezing
368            && let Some(ref prev) = prev_line
369            && prev == line
370            && n == 16
371        {
372            if !squeezed {
373                let _ = writeln!(out, "*");
374                squeezed = true;
375            }
376            offset += n as u64;
377            continue;
378        }
379        squeezed = false;
380        prev_line = Some(line.to_vec());
381
382        if let Err(e) = format_line(line, offset, &mut out) {
383            eprintln!("hexdump: {e}");
384            return ExitCode::FAILURE;
385        }
386
387        offset += n as u64;
388    }
389
390    // Print final offset.
391    let _ = match mode {
392        Mode::Canonical => writeln!(out, "{offset:08x}"),
393        _ => writeln!(out, "{offset:07x}"),
394    };
395
396    ExitCode::SUCCESS
397}