macos_unifiedlogs/
dsc.rs

1// Copyright 2022 Mandiant, Inc. All Rights Reserved
2// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
3// http://www.apache.org/licenses/LICENSE-2.0
4// Unless required by applicable law or agreed to in writing, software distributed under the License
5// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
6// See the License for the specific language governing permissions and limitations under the License.
7
8use crate::util::extract_string;
9use log::error;
10use nom::Needed;
11use nom::bytes::complete::take;
12use nom::number::complete::{be_u128, le_u16, le_u32, le_u64};
13use serde::{Deserialize, Serialize};
14use std::mem::size_of;
15
16#[derive(Debug, Serialize, Deserialize, Default)]
17pub struct SharedCacheStrings {
18    pub signature: u32,
19    pub major_version: u16, // Version 1 up to Big Sur. Monterey has Version 2!
20    pub minor_version: u16,
21    pub number_ranges: u32,
22    pub number_uuids: u32,
23    pub ranges: Vec<RangeDescriptor>,
24    pub uuids: Vec<UUIDDescriptor>,
25    pub dsc_uuid: String,
26}
27
28#[derive(Debug, Serialize, Deserialize, Default)]
29pub struct RangeDescriptor {
30    pub range_offset: u64, // In Major version 2 this is 8 bytes, in version 1 its 4 bytes
31    pub data_offset: u32,
32    pub range_size: u32,
33    pub unknown_uuid_index: u64, // Unknown value, added in Major version: 2. Appears to be UUID index. In version 1 the index is 4 bytes and is at the start of the range descriptor
34    pub strings: Vec<u8>,
35}
36
37#[derive(Debug, Serialize, Deserialize, Default)]
38pub struct UUIDDescriptor {
39    pub text_offset: u64, // Size appears to be 8 bytes in Major version: 2. 4 bytes in Major Version 1
40    pub text_size: u32,
41    pub uuid: String,
42    pub path_offset: u32,
43    pub path_string: String, // Not part of format
44}
45
46impl SharedCacheStrings {
47    /// Parse shared strings data (the file(s) in /private/var/db/uuidtext/dsc)
48    pub fn parse_dsc(data: &[u8]) -> nom::IResult<&[u8], SharedCacheStrings> {
49        let (input, sig) = take(size_of::<u32>())(data)?;
50        let (_, signature) = le_u32(sig)?;
51
52        let expected_dsc_signature = 0x64736368;
53        if expected_dsc_signature != signature {
54            error!(
55                "[macos-unifiedlogs] Incorrect DSC file signature. Expected {expected_dsc_signature}. Got: {signature}"
56            );
57            return Err(nom::Err::Incomplete(Needed::Unknown));
58        }
59
60        let mut shared_cache_strings = SharedCacheStrings {
61            signature,
62            ..Default::default()
63        };
64
65        let (input, major) = take(size_of::<u16>())(input)?;
66        let (input, minor) = take(size_of::<u16>())(input)?;
67        let (input, number_ranges) = take(size_of::<u32>())(input)?;
68        let (mut input, number_uuids) = take(size_of::<u32>())(input)?;
69
70        let (_, dsc_major) = le_u16(major)?;
71        let (_, dsc_minor) = le_u16(minor)?;
72        let (_, dsc_number_ranges) = le_u32(number_ranges)?;
73        let (_, dsc_number_uuids) = le_u32(number_uuids)?;
74
75        shared_cache_strings.minor_version = dsc_minor;
76        shared_cache_strings.major_version = dsc_major;
77        shared_cache_strings.number_ranges = dsc_number_ranges;
78        shared_cache_strings.number_uuids = dsc_number_uuids;
79
80        let mut range_count = 0;
81        while range_count < shared_cache_strings.number_ranges {
82            let (range_input, range_data) = SharedCacheStrings::get_ranges(input, &dsc_major)?;
83            input = range_input;
84            shared_cache_strings.ranges.push(range_data);
85            range_count += 1;
86        }
87
88        let mut uuid_count = 0;
89        while uuid_count < shared_cache_strings.number_uuids {
90            let (uuid_input, uuid_data) = SharedCacheStrings::get_uuids(input, &dsc_major)?;
91            input = uuid_input;
92            shared_cache_strings.uuids.push(uuid_data);
93            uuid_count += 1;
94        }
95
96        for uuids in &mut shared_cache_strings.uuids {
97            let (_, path_string) = SharedCacheStrings::get_paths(data, uuids.path_offset)?;
98            uuids.path_string = path_string;
99        }
100
101        for range in &mut shared_cache_strings.ranges {
102            let (_, strings) =
103                SharedCacheStrings::get_strings(data, range.data_offset, range.range_size)?;
104            range.strings = strings;
105        }
106
107        Ok((input, shared_cache_strings))
108    }
109
110    // Get range data, used by log entries to determine where the base string entry is located.
111    fn get_ranges<'a>(data: &'a [u8], version: &u16) -> nom::IResult<&'a [u8], RangeDescriptor> {
112        let version_number: u16 = 2;
113        let mut input = data;
114        let mut range_data = RangeDescriptor::default();
115
116        // Version 2 (Monterey and higher) changed the Range format a bit
117        // range offset is now 8 bytes (vs 4 bytes) and starts at beginning
118        // The uuid index was moved to end
119        range_data.range_offset = if version == &version_number {
120            let (data_input, value_range_offset) = take(size_of::<u64>())(input)?;
121            input = data_input;
122            let (_, dsc_range_offset) = le_u64(value_range_offset)?;
123            dsc_range_offset
124        } else {
125            // Get data based on version 1
126            let (data_input, uuid_descriptor_index) = take(size_of::<u32>())(input)?;
127            let (_, dsc_uuid_descriptor_index) = le_u32(uuid_descriptor_index)?;
128            range_data.unknown_uuid_index = u64::from(dsc_uuid_descriptor_index);
129
130            let (data_input, value_range_offset) = take(size_of::<u32>())(data_input)?;
131            input = data_input;
132            let (_, dsc_range_offset) = le_u32(value_range_offset)?;
133            u64::from(dsc_range_offset)
134        };
135
136        let (input, data_offset) = take(size_of::<u32>())(input)?;
137        let (mut input, range_size) = take(size_of::<u32>())(input)?;
138
139        let (_, dsc_data_offset) = le_u32(data_offset)?;
140        let (_, dsc_range_size) = le_u32(range_size)?;
141
142        range_data.data_offset = dsc_data_offset;
143        range_data.range_size = dsc_range_size;
144
145        // UUID index is now located at the end of the format (instead of beginning)
146        if version == &version_number {
147            let (version_two_input, unknown) = take(size_of::<u64>())(input)?;
148            let (_, dsc_unknown) = le_u64(unknown)?;
149            range_data.unknown_uuid_index = dsc_unknown;
150            input = version_two_input;
151        }
152        Ok((input, range_data))
153    }
154
155    // Get UUID entries related to ranges
156    fn get_uuids<'a>(data: &'a [u8], version: &u16) -> nom::IResult<&'a [u8], UUIDDescriptor> {
157        let mut uuid_data = UUIDDescriptor::default();
158
159        let version_number: u16 = 2;
160        let mut input = data;
161        if version == &version_number {
162            let (version_two_input, text_offset) = take(size_of::<u64>())(input)?;
163            let (_, dsc_text_offset) = le_u64(text_offset)?;
164            uuid_data.text_offset = dsc_text_offset;
165            input = version_two_input;
166        } else {
167            let (version_one_input, text_offset) = take(size_of::<u32>())(input)?;
168            let (_, dsc_text_offset) = le_u32(text_offset)?;
169            uuid_data.text_offset = u64::from(dsc_text_offset);
170            input = version_one_input;
171        }
172
173        let (input, text_size) = take(size_of::<u32>())(input)?;
174        let (input, uuid) = take(size_of::<u128>())(input)?;
175        let (input, path_offset) = take(size_of::<u32>())(input)?;
176
177        let (_, dsc_text_size) = le_u32(text_size)?;
178        let (_, dsc_uuid) = be_u128(uuid)?;
179        let (_, dsc_path_offset) = le_u32(path_offset)?;
180
181        uuid_data.text_size = dsc_text_size;
182        uuid_data.uuid = format!("{dsc_uuid:X}");
183        uuid_data.path_offset = dsc_path_offset;
184
185        Ok((input, uuid_data))
186    }
187
188    fn get_paths(data: &[u8], path_offset: u32) -> nom::IResult<&[u8], String> {
189        let (nom_path_offset, _) = take(path_offset)(data)?;
190        let (_, path) = extract_string(nom_path_offset)?;
191        Ok((nom_path_offset, path))
192    }
193
194    // After parsing the ranges and UUIDs remaining data are the base log entry strings
195    fn get_strings(
196        data: &[u8],
197        string_offset: u32,
198        string_range: u32,
199    ) -> nom::IResult<&[u8], Vec<u8>> {
200        let (nom_string_offset, _) = take(string_offset)(data)?;
201        let (_, strings) = take(string_range)(nom_string_offset)?;
202        Ok((&[], strings.to_vec()))
203    }
204}
205
206#[cfg(test)]
207mod tests {
208    use crate::dsc::SharedCacheStrings;
209    use std::fs;
210    use std::path::PathBuf;
211
212    #[test]
213    fn test_parse_dsc_version_one() {
214        let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
215        test_path
216            .push("tests/test_data/DSC Tests/big_sur_version_1_522F6217CB113F8FB845C2A1B784C7C2");
217
218        let buffer = fs::read(test_path).unwrap();
219
220        let (_, results) = SharedCacheStrings::parse_dsc(&buffer).unwrap();
221        assert_eq!(results.uuids.len(), 532);
222        assert_eq!(results.uuids[0].uuid, "4DF6D8F5D9C23A968DE45E99D6B73DC8");
223        assert_eq!(results.uuids[0].path_offset, 19919502);
224        assert_eq!(results.uuids[0].text_size, 8192);
225        assert_eq!(results.uuids[0].text_offset, 73728);
226        assert_eq!(
227            results.uuids[0].path_string,
228            "/usr/lib/system/libsystem_blocks.dylib"
229        );
230
231        assert_eq!(results.ranges.len(), 788);
232        assert_eq!(results.ranges[0].strings, [0]);
233        assert_eq!(results.ranges[0].unknown_uuid_index, 0);
234        assert_eq!(results.ranges[0].range_offset, 80296);
235        assert_eq!(results.ranges[0].range_size, 1);
236
237        assert_eq!(results.signature, 1685283688); // hcsd
238        assert_eq!(results.major_version, 1);
239        assert_eq!(results.minor_version, 0);
240        assert_eq!(results.dsc_uuid, "");
241        assert_eq!(results.number_ranges, 788);
242        assert_eq!(results.number_uuids, 532);
243    }
244
245    #[test]
246    fn test_parse_dsc_version_two() {
247        let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
248        test_path
249            .push("tests/test_data/DSC Tests/monterey_version_2_3D05845F3F65358F9EBF2236E772AC01");
250
251        let buffer = fs::read(test_path).unwrap();
252
253        let (_, results) = SharedCacheStrings::parse_dsc(&buffer).unwrap();
254        assert_eq!(results.uuids.len(), 2250);
255        assert_eq!(results.uuids[0].uuid, "326DD91B4EF83D80B90BF50EB7D7FDB8");
256        assert_eq!(results.uuids[0].path_offset, 98376932);
257        assert_eq!(results.uuids[0].text_size, 8192);
258        assert_eq!(results.uuids[0].text_offset, 327680);
259        assert_eq!(
260            results.uuids[0].path_string,
261            "/usr/lib/system/libsystem_blocks.dylib"
262        );
263
264        assert_eq!(results.ranges.len(), 3432);
265        assert_eq!(results.ranges[0].strings, [0]);
266        assert_eq!(results.ranges[0].unknown_uuid_index, 0);
267        assert_eq!(results.ranges[0].range_offset, 334248);
268        assert_eq!(results.ranges[0].range_size, 1);
269
270        assert_eq!(results.signature, 1685283688); // hcsd
271        assert_eq!(results.major_version, 2);
272        assert_eq!(results.minor_version, 0);
273        assert_eq!(results.dsc_uuid, "");
274        assert_eq!(results.number_ranges, 3432);
275        assert_eq!(results.number_uuids, 2250);
276    }
277
278    #[test]
279    #[should_panic(expected = "Incomplete(Unknown)")]
280    fn test_bad_header() {
281        let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
282        test_path.push(
283            "tests/test_data/Bad Data/DSC/bad_header_version_1_522F6217CB113F8FB845C2A1B784C7C2",
284        );
285
286        let buffer = fs::read(test_path).unwrap();
287        let (_, _) = SharedCacheStrings::parse_dsc(&buffer).unwrap();
288    }
289
290    #[test]
291    #[should_panic(expected = "Eof")]
292    fn test_bad_content() {
293        let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
294        test_path.push(
295            "tests/test_data/Bad Data/DSC/bad_content_version_1_522F6217CB113F8FB845C2A1B784C7C2",
296        );
297
298        let buffer = fs::read(test_path).unwrap();
299        let (_, _) = SharedCacheStrings::parse_dsc(&buffer).unwrap();
300    }
301
302    #[test]
303    #[should_panic(expected = "Incomplete(Unknown)")]
304    fn test_bad_file() {
305        let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
306        test_path.push("tests/test_data/Bad Data/DSC/Badfile");
307
308        let buffer = fs::read(test_path).unwrap();
309        let (_, _) = SharedCacheStrings::parse_dsc(&buffer).unwrap();
310    }
311}