hhh 1.0.1

The hhh Binary File Processor
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
// hhh
// Copyright (c) 2023 by Stacy Prowell.  All rights reserved.
// https://gitlab.com/sprowell/hhh

//! Process options for the command line interface.

use crate::VERSION;
use clap::Parser as ClapParser;
use std::{collections::BTreeMap, path::PathBuf};
use trivet::{numbers::Radix, parse_from_string};

/// Parse the radix value for the command line.
fn _radix_parser(value: &str) -> Result<Radix, String> {
    let value = value.to_ascii_lowercase();
    if value.starts_with('b') {
        Ok(Radix::Binary)
    } else if value.starts_with('h') {
        Ok(Radix::Hexadecimal)
    } else if value.starts_with('d') {
        Ok(Radix::Decimal)
    } else if value.starts_with('o') {
        Ok(Radix::Octal)
    } else {
        Err("Invalid radix; only binary, octal, decimal, and hexadecimal are allowed".to_string())
    }
}

/// Parse a metadata entry into a name and value.
///
/// The input is a string to parse.  The string is divided at the first
/// equal sign, and the first and second parts are returned as a pair.
/// If an equal sign is not found, or if it is the first character,
/// then an error is returned.
///
fn metadata_parser(value: &str) -> Result<(String, String), String> {
    // Find an equal sign in the string.  There must be one, and it
    // cannot be at position zero.
    if let Some(index) = value.find('=') {
        // Split the string into the name and value.  The value can be
        // empty, but the name cannot.
        if index == 0 {
            Err(format!(
                "The metadata name (prior to the equal sign) cannot be empty: {}",
                value
            ))
        } else {
            Ok((value[0..index].to_string(), value[index + 1..].to_string()))
        }
    } else {
        Err("The metadata must be of the form name=value.".to_string())
    }
}

/// Parse a number.
fn u64_parser(value: &str) -> Result<u64, String> {
    match parse_from_string(value).parse_u64() {
        Ok(value) => Ok(value),
        Err(err) => Err(err.to_string()),
    }
}

/// Parse a signed number.
fn i64_parser(value: &str) -> Result<i64, String> {
    match parse_from_string(value).parse_i64() {
        Ok(value) => Ok(value),
        Err(err) => Err(err.to_string()),
    }
}

/// Parse a u16.
fn u16_parser(value: &str) -> Result<u16, String> {
    match u64_parser(value) {
        Ok(value) => {
            if value > u16::MAX as u64 {
                Err("The maximum value is 65535".to_string())
            } else {
                Ok(value as u16)
            }
        }
        Err(err) => Err(err),
    }
}

/// Generate a hexdump of a binary file, or construct a binary file given a
/// hex dump.  Many configuration options are supported.
///
#[derive(Debug, ClapParser, Clone)]
#[command(
    name = "hhh",
    version = VERSION,
    author = "Stacy Prowell (sprowell@gmail.com)",
    about = "Binary file manipulation tool",
    long_about = r#"
Generate hexdumps and parse binary file descriptions to create a binary file.
The parsing of binary descriptions is fairly premissive and has features to
make creating a custom binary file easier."#
)]
pub struct HhhArgs {
    /// If generating a hex dump, the bias is added to the offset.  If parsing,
    /// the bias is subtracted from the offset.  The bias can be negative, but
    /// an error occurs if the bias makes the offset negative during parsing.
    /// If the bias would make the offset negative during hex dump generation,
    /// then the total offset is set to zero.
    #[clap(long, default_value_t = 0, value_parser = i64_parser)]
    pub bias: i64,

    /// Specify the number of bytes to include in a group during hex dump
    /// generation.  By default this is one
    #[clap(long, short = 'b', default_value_t = 1, value_parser = u16_parser)]
    pub bytes_per_group: u16,

    /// Specify the maximum number of bytes to output during hex dump generation.
    #[clap(long, default_value_t = 0, value_parser = u64_parser)]
    pub count: u64,

    /// Specify a directive.  The directive is interpreted and applied prior to
    /// generating hex dumps or parsing a description file.
    #[clap(short = 'D')]
    pub directives: Vec<String>,

    /// Specify the number of groups per line.  If not given, then 16 is used.
    #[clap(long, short = 'g', default_value_t = 16, value_parser = u16_parser)]
    pub groups_per_line: u16,

    /// Separator between groups on a line.  By default a single space is used.
    #[clap(long, short = 's', default_value_t = String::from(" "))]
    pub group_separator: String,

    /// Print help on directives and exit.
    #[clap(long)]
    pub list_directives: bool,

    /// During hex dump generation, reverse the order of bytes within a group.
    /// During parsing, reverse the order of bytes within an unprefixed byte
    /// group.
    #[clap(long)]
    pub little_endian: bool,

    /// Include a short metadata block at the top of hex dump output.
    #[clap(long, short = 'm')]
    pub meta: bool,

    /// Suppress ASCII preview when generating a hex dump.
    #[clap(long)]
    pub no_ascii: bool,

    /// Suppress the offset at the start of each line when generating a hex dump.
    #[clap(long)]
    pub no_offset: bool,

    /// Specify a hard limit on the offset value during parsing to prevent a huge file.
    /// By default 0x4000_0000 is used.
    #[clap(long, value_parser = u64_parser, default_value_t = 0x4000_0000)]
    pub offset_limit: u64,

    /// Specify the output file.  If not given, standard output is used.
    #[clap(long, short = 'o')]
    pub output: Option<PathBuf>,

    /// Instead of generating a hex dump, parse a binary description and generate a binary
    /// file.
    #[clap(short = 'p', long)]
    pub parse: bool,

    /// Add to or suppress an item in the metadata.  If VALUE is empty, then the NAME
    /// element is suppressed.
    #[clap(long, short = 'M', value_parser = metadata_parser, value_name = "NAME=VALUE")]
    pub set_meta: Vec<(String, String)>,

    /// Specify a beginning offset.  Bias is ignored here.  Hex dump generation starts
    /// at the given zero-based offset into the file stream.  This can cross files if
    /// multiple files are given as input.
    #[clap(long, default_value_t = 0, value_parser = u64_parser)]
    pub start: u64,

    /// Omit lines that consist of only zero bytes.
    #[clap(long)]
    pub skip_zeros: bool,

    /// Ignore all errors and make a best effort attempt to parse a binary description
    /// and generate a file.
    #[clap(long)]
    pub stoic: bool,

    /// Use upper case for hexadecimal.  By default lower case is used.
    #[clap(long, default_value_t = false)]
    pub uppercase: bool,

    /// Default to decimal and use radix prefixes for hex dump generation and for
    /// parsing.
    #[clap(short = 'r', long, default_value_t = false)]
    pub radix_prefixes: bool,

    /// Set the number of digits to pad the offset value.  By default this is 8.
    #[clap(long, default_value_t = 8)]
    pub offset_width: u8,

    /// The most recent offset value.
    #[clap(skip)]
    pub last_offset: u64,

    /// Assigned constants.
    #[clap(skip)]
    pub variables: BTreeMap<String, Vec<u8>>,

    /// The bias stack.  This is used to implement the structure scope.
    #[clap(skip)]
    pub bias_stack: Vec<i64>,

    /// Do not read the configuration file if it exists.
    #[clap(long, default_value_t = false)]
    pub no_configuration_file: bool,

    /// A list of input files, read in order and treated as if concatenated.  If no
    /// files are specified, then standard input is used.
    pub files: Vec<PathBuf>,
}

impl HhhArgs {
    /// Get the value of a constant by name, if assigned.  This also handles the special
    /// variables `$_` and `$__`, *if* [`Self::last_offset`] has been set correctly.
    pub fn get_variable(&self, name: &str) -> Option<Vec<u8>> {
        if name == "_" {
            // Return the current offset.
            Some(self.last_offset.to_be_bytes().to_vec())
        } else if name == "__" {
            // Find the outer reference.  This is either (1) the bias at the top level, or (2) the
            // bias that is on top of the stack.
            Some(
                self.bias_stack
                    .last()
                    .unwrap_or(&-self.bias)
                    .to_be_bytes()
                    .to_vec(),
            )
        } else {
            self.variables.get(name).cloned()
        }
    }

    /// Assign a value to a given constant, replacing any prior assignment.
    pub fn set_variable(&mut self, name: &str, value: &[u8]) {
        // Do not set the special variables $_ and $__.
        if name == "_" || name == "__" {
            return;
        }
        self.variables.insert(name.to_string(), value.to_vec());
    }

    /// Remove a constant assignment, if present.
    pub fn unset_variable(&mut self, name: &str) {
        self.variables.remove(name);
    }
}

impl Default for HhhArgs {
    fn default() -> Self {
        Self {
            files: vec![],
            skip_zeros: false,
            start: 0,
            set_meta: vec![],
            output: None,
            no_offset: false,
            no_ascii: false,
            meta: false,
            little_endian: false,
            bias: 0,
            bytes_per_group: 1,
            groups_per_line: 16,
            group_separator: " ".to_string(),
            stoic: false,
            uppercase: false,
            radix_prefixes: false,
            offset_width: 8,
            last_offset: 0,
            variables: BTreeMap::new(),
            directives: vec![],
            parse: false,
            list_directives: false,
            offset_limit: 0x4000_0000,
            count: 0,
            no_configuration_file: false,
            bias_stack: vec![],
        }
    }
}

#[cfg(test)]
mod test {

    use super::{i64_parser, u16_parser, u64_parser, HhhArgs};
    use crate::options::{_radix_parser, metadata_parser};
    use trivet::numbers::Radix;

    #[test]
    fn default_test() {
        let args: HhhArgs = Default::default();
        assert_eq!(args.offset_limit, 0x4000_0000);
    }

    #[test]
    fn radix_test() {
        assert_eq!(
            _radix_parser("binary").unwrap().value(),
            Radix::Binary.value()
        );
        assert_eq!(
            _radix_parser("octal").unwrap().value(),
            Radix::Octal.value()
        );
        assert_eq!(
            _radix_parser("decimal").unwrap().value(),
            Radix::Decimal.value()
        );
        assert_eq!(
            _radix_parser("hexadecimal").unwrap().value(),
            Radix::Hexadecimal.value()
        );
        assert_eq!(_radix_parser("b").unwrap().value(), Radix::Binary.value());
        assert_eq!(_radix_parser("o").unwrap().value(), Radix::Octal.value());
        assert_eq!(_radix_parser("d").unwrap().value(), Radix::Decimal.value());
        assert_eq!(
            _radix_parser("h").unwrap().value(),
            Radix::Hexadecimal.value()
        );
        assert_eq!(_radix_parser("Bin").unwrap().value(), Radix::Binary.value());
        assert_eq!(_radix_parser("Oct").unwrap().value(), Radix::Octal.value());
        assert_eq!(
            _radix_parser("Dec").unwrap().value(),
            Radix::Decimal.value()
        );
        assert_eq!(
            _radix_parser("Hex").unwrap().value(),
            Radix::Hexadecimal.value()
        );
        assert!(_radix_parser("Quinary").is_err())
    }

    #[test]
    fn metadata_parser_test() {
        assert_eq!(
            metadata_parser("dog=fido"),
            Ok(("dog".to_string(), "fido".to_string()))
        );
        assert_eq!(
            metadata_parser("dog = fido"),
            Ok(("dog ".to_string(), " fido".to_string()))
        );
        assert_eq!(
            metadata_parser("dog=fido=good doggy"),
            Ok(("dog".to_string(), "fido=good doggy".to_string()))
        );
        assert!(metadata_parser("=man with no name").is_err());
        assert!(metadata_parser("authoritarianism").is_err());
    }

    #[test]
    fn parse_numbers_zeros() {
        assert_eq!(u64_parser("0").unwrap(), 0);
        assert_eq!(i64_parser("0").unwrap(), 0);
        assert_eq!(u16_parser("0").unwrap(), 0);
        assert_eq!(u64_parser("0x0").unwrap(), 0);
        assert_eq!(i64_parser("0x0").unwrap(), 0);
        assert_eq!(u16_parser("0x0").unwrap(), 0);
        assert_eq!(u64_parser("0o0").unwrap(), 0);
        assert_eq!(i64_parser("0o0").unwrap(), 0);
        assert_eq!(u16_parser("0o0").unwrap(), 0);
        assert_eq!(u64_parser("0b0").unwrap(), 0);
        assert_eq!(i64_parser("0b0").unwrap(), 0);
        assert_eq!(u16_parser("0b0").unwrap(), 0);
        assert!(u64_parser("").is_err());
        assert!(u64_parser("0x").is_err());
        assert!(u64_parser("0o").is_err());
        assert!(u64_parser("0b").is_err());
        assert!(i64_parser("").is_err());
        assert!(i64_parser("0x").is_err());
        assert!(i64_parser("0o").is_err());
        assert!(i64_parser("0b").is_err());
        assert!(u16_parser("").is_err());
        assert!(u16_parser("0x").is_err());
        assert!(u16_parser("0o").is_err());
        assert!(u16_parser("0b").is_err());
    }

    #[test]
    fn parse_numbers() {
        assert_eq!(
            u64_parser("0xffffffff_ffffffff").unwrap(),
            0xffffffff_ffffffff
        );
        assert_eq!(
            i64_parser("0x7fffffff_ffffffff").unwrap(),
            0x7fffffff_ffffffff
        );
        assert_eq!(
            i64_parser("-0x7fffffff_ffffffff").unwrap(),
            -0x7fffffff_ffffffff
        );
        assert_eq!(i64_parser("-1").unwrap(), -1);
        assert_eq!(u16_parser("0xffff").unwrap(), 0xffff);
        assert!(i64_parser("0xffffffff_ffffffff").is_err());
        assert!(u16_parser("0x100000000").is_err());
        assert!(u16_parser("0b21").is_err());
    }

    #[test]
    fn constant_test() {
        let mut args = HhhArgs::default();
        args.set_variable("fido", b"good doggy");
        args.set_variable("", b"");
        assert_eq!(args.get_variable("fido").unwrap(), b"good doggy");
        assert_eq!(args.get_variable("").unwrap(), b"");
        args.unset_variable("thomas");
        args.unset_variable("");
        assert_eq!(args.get_variable(""), None);
        assert_eq!(args.get_variable("rover"), None);
    }
}