1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
use crate::common::is_offset_safe;
use crate::signatures::common::{
SignatureError, SignatureResult, CONFIDENCE_HIGH, CONFIDENCE_MEDIUM,
};
/// Some tarball constants
const TARBALL_BLOCK_SIZE: usize = 512;
const TARBALL_MAGIC_OFFSET: usize = 257;
const TARBALL_MAGIC_SIZE: usize = 5;
const TARBALL_SIZE_OFFSET: usize = 124;
const TARBALL_SIZE_LEN: usize = 11;
const TARBALL_UNIVERSAL_MAGIC: &[u8; 5] = b"ustar";
const TARBALL_MIN_EXPECTED_HEADERS: usize = 10;
/// Human readable description
pub const DESCRIPTION: &str = "POSIX tar archive";
/// Magic bytes for tarball and GNU tarball file types
pub fn tarball_magic() -> Vec<Vec<u8>> {
vec![b"ustar\x00".to_vec(), b"ustar\x20\x20\x00".to_vec()]
}
/// Validate tarball signatures
pub fn tarball_parser(file_data: &[u8], offset: usize) -> Result<SignatureResult, SignatureError> {
// Stores the running total size of the tarball
let mut tarball_total_size: usize = 0;
// Keep a count of how many tar entry headers were validated
let mut valid_header_count: usize = 0;
// Calculate the actual start of the tarball (header magic does not start at the beginning of a tar entry)
let tarball_start_offset = offset - TARBALL_MAGIC_OFFSET;
// Tarball magic bytes do not start at the beginning of the tarball file
let mut next_header_start = tarball_start_offset;
let mut previous_header_start = None;
let available_data = file_data.len();
// Loop through available data, processing tarball entry headers
while is_offset_safe(available_data, next_header_start, previous_header_start) {
// Calculate the end of the next tarball entry data
let next_header_end = next_header_start + TARBALL_BLOCK_SIZE;
// Get the next header's data; this will fail if not enough data is present, protecting
// other functions (header_checksum_is_valid, tarball_entry_size) from out-of-bounds access
match file_data.get(next_header_start..next_header_end) {
None => {
break;
}
Some(tarball_header_block) => {
// Bad checksum? Quit processing headers.
if !header_checksum_is_valid(tarball_header_block) {
break;
}
// Increment the count of valid tarball headers found
valid_header_count += 1;
// Get the reported size of the next entry header
match tarball_entry_size(tarball_header_block) {
Err(_) => {
break;
}
Ok(entry_size) => {
// Update total size count, and next/previous header offsets
tarball_total_size += entry_size;
previous_header_start = Some(next_header_start);
next_header_start += entry_size;
}
}
}
}
}
// We expect that a tarball should be, at a minimum, one block in size
if tarball_total_size >= TARBALL_BLOCK_SIZE {
// Default confidence is medium
let mut confidence = CONFIDENCE_MEDIUM;
// If more than just a few tarball headers were found and processed successfully, we have pretty high confidence that this isn't a false positive
if valid_header_count >= TARBALL_MIN_EXPECTED_HEADERS {
confidence = CONFIDENCE_HIGH;
}
return Ok(SignatureResult {
description: format!("{}, file count: {}", DESCRIPTION, valid_header_count),
offset: tarball_start_offset,
size: tarball_total_size,
confidence,
..Default::default()
});
}
Err(SignatureError)
}
/// Validate a tarball entry checksum
fn header_checksum_is_valid(header_block: &[u8]) -> bool {
const TARBALL_CHECKSUM_START: usize = 148;
const TARBALL_CHECKSUM_END: usize = 156;
let checksum_value_string: &[u8] = &header_block[TARBALL_CHECKSUM_START..TARBALL_CHECKSUM_END];
let reported_checksum = tarball_octal(checksum_value_string);
let mut sum: usize = 0;
for (i, header_byte) in header_block.iter().enumerate() {
if (TARBALL_CHECKSUM_START..TARBALL_CHECKSUM_END).contains(&i) {
sum += 0x20;
} else {
sum += *header_byte as usize;
}
}
sum == reported_checksum
}
/// Returns the size of a tarball entry, including header and data
fn tarball_entry_size(tarball_entry_data: &[u8]) -> Result<usize, SignatureError> {
// Get the tarball entry's magic bytes
let entry_magic: &[u8] =
&tarball_entry_data[TARBALL_MAGIC_OFFSET..TARBALL_MAGIC_OFFSET + TARBALL_MAGIC_SIZE];
// Make sure the magic bytes are valid
if entry_magic == TARBALL_UNIVERSAL_MAGIC {
// Pull this tarball entry's data size, stored as ASCII octal, out of the header
let entry_size_string: &[u8] =
&tarball_entry_data[TARBALL_SIZE_OFFSET..TARBALL_SIZE_OFFSET + TARBALL_SIZE_LEN];
// Convert the ASCII octal to a number
let reported_entry_size: usize = tarball_octal(entry_size_string);
// The actual size of this entry will be the data size, rounded up to the nearest block size, PLUS one block for the entry header
let block_count: usize =
1 + (reported_entry_size as f32 / TARBALL_BLOCK_SIZE as f32).ceil() as usize;
// Total size is the total number of blocks times the block size
return Ok(block_count * TARBALL_BLOCK_SIZE);
}
Err(SignatureError)
}
/// Convert octal string to a number
fn tarball_octal(octal_string: &[u8]) -> usize {
let mut num: usize = 0;
for octal_char in octal_string {
// ASCII octal values should be ASCII
if *octal_char < 0x30 || *octal_char > 0x39 {
break;
} else {
num *= 8;
num = num + (*octal_char as usize) - 0x30;
}
}
num
}