ld_so_cache/
parsers.rs

1//! Binary parsers for ld.so.cache file formats.
2//!
3//! This module contains the core parsing logic using the nom parser combinator library.
4//! It handles both the legacy ld.so-1.7.0 format and the modern glibc format with
5//! hardware capabilities and extensions.
6//!
7//! The parsing is designed to be resilient to malformed data and will attempt to
8//! extract as much information as possible even from partially corrupted files.
9
10use nom::{
11    bytes::complete::{tag, take},
12    number::complete::{le_u32, le_u64, le_i32, u8},
13    Parser,
14    IResult,
15};
16
17use crate::{
18    LdCache, OldCache, NewCache, OldFileEntry, NewFileEntry,
19    ExtensionDirectory, ExtensionSection, CacheError,
20};
21
22const OLD_CACHE_MAGIC: &[u8] = b"ld.so-1.7.0";
23const NEW_CACHE_MAGIC: &[u8] = b"glibc-ld.so.cache";
24const NEW_CACHE_VERSION: &[u8] = b"1.1";
25const EXTENSION_MAGIC: u32 = 0xEA8D_4E78;
26
27/// Parses an ld.so.cache file from raw bytes.
28///
29/// This is the main entry point for parsing cache files. It automatically detects
30/// the format (old, new, or combined) and delegates to the appropriate parser.
31///
32/// # Arguments
33///
34/// * `input` - Raw bytes of the cache file
35///
36/// # Returns
37///
38/// A parsed `LdCache` structure containing all discovered cache data.
39///
40/// # Errors
41///
42/// * `CacheError::TruncatedFile` - Input is too short to contain valid cache data
43/// * `CacheError::InvalidMagic` - File doesn't start with a recognized magic number
44/// * `CacheError::ParseError` - Parsing failed due to malformed data
45///
46/// # Examples
47///
48/// ```rust
49/// use ld_so_cache::parsers::parse_ld_cache;
50/// use std::fs;
51///
52/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
53/// let data = fs::read("/etc/ld.so.cache")?;
54/// let cache = parse_ld_cache(&data)?;
55///
56/// println!("Found {} library entries", cache.get_entries()?.len());
57/// # Ok(())
58/// # }
59/// ```
60pub fn parse_ld_cache(input: &[u8]) -> Result<LdCache, CacheError> {
61    let original_input = input;
62    
63    if input.len() < 15 {  // Minimum for old format header
64        return Err(CacheError::TruncatedFile);
65    }
66
67    if input.starts_with(NEW_CACHE_MAGIC) && input.len() >= 20 && &input[17..20] == NEW_CACHE_VERSION {
68        parse_new_format_only(original_input)
69    } else if input.starts_with(OLD_CACHE_MAGIC) {
70        parse_old_format_with_possible_new(original_input)
71    } else {
72        Err(CacheError::InvalidMagic)
73    }
74}
75
76fn parse_old_format_with_possible_new(input: &[u8]) -> Result<LdCache, CacheError> {
77    let (remaining, old_cache) = parse_old_cache_header(input)
78        .map_err(|_| CacheError::ParseError("Failed to parse old cache header".to_string()))?;
79
80    let old_entries_size = old_cache.nlibs as usize * 12;
81    if remaining.len() < old_entries_size {
82        return Err(CacheError::TruncatedFile);
83    }
84
85    let (after_old_entries, old_entries) = nom::multi::count(parse_old_file_entry, old_cache.nlibs as usize).parse(remaining)
86        .map_err(|_| CacheError::ParseError("Failed to parse old entries".to_string()))?;
87
88    let old_cache_complete = OldCache {
89        nlibs: old_cache.nlibs,
90        entries: old_entries,
91    };
92
93    let aligned_offset = align_to_ptr_size(after_old_entries.as_ptr() as usize - input.as_ptr() as usize);
94    let padding_needed = aligned_offset - (after_old_entries.as_ptr() as usize - input.as_ptr() as usize);
95    
96    if after_old_entries.len() < padding_needed {
97        let string_table = after_old_entries.to_vec();
98        return Ok(LdCache {
99            old_format: Some(old_cache_complete),
100            new_format: None,
101            string_table,
102            string_table_offset: 0,  // Old format uses relative offsets
103        });
104    }
105
106    let after_padding = &after_old_entries[padding_needed..];
107    
108    if after_padding.len() >= 20 && after_padding.starts_with(NEW_CACHE_MAGIC) && &after_padding[17..20] == NEW_CACHE_VERSION {
109        let (string_table_start, new_cache) = parse_new_cache_header(after_padding)
110            .map_err(|_| CacheError::ParseError("Failed to parse new cache header".to_string()))?;
111
112        let new_entries_size = new_cache.nlibs as usize * 24;
113        if string_table_start.len() < new_entries_size {
114            return Err(CacheError::TruncatedFile);
115        }
116
117        let (string_table_bytes, new_entries) = nom::multi::count(parse_new_file_entry, new_cache.nlibs as usize).parse(string_table_start)
118            .map_err(|_| CacheError::ParseError("Failed to parse new entries".to_string()))?;
119
120        let extensions = if new_cache.extension_offset > 0 {
121            let ext_offset = new_cache.extension_offset as usize;
122            if ext_offset < input.len() {
123                parse_extension_directory(&input[ext_offset..]).ok().map(|(_, ext)| ext)
124            } else {
125                None
126            }
127        } else {
128            None
129        };
130
131        let new_cache_complete = NewCache {
132            nlibs: new_cache.nlibs,
133            len_strings: new_cache.len_strings,
134            flags: new_cache.flags,
135            extension_offset: new_cache.extension_offset,
136            entries: new_entries,
137            extensions,
138        };
139
140        let string_table = if new_cache.len_strings > 0 && string_table_bytes.len() >= new_cache.len_strings as usize {
141            string_table_bytes[..new_cache.len_strings as usize].to_vec()
142        } else {
143            string_table_bytes.to_vec()
144        };
145
146        Ok(LdCache {
147            old_format: Some(old_cache_complete),
148            new_format: Some(new_cache_complete),
149            string_table,
150            string_table_offset: 0,  // Old format uses relative offsets
151        })
152    } else {
153        let string_table = after_old_entries.to_vec();
154        Ok(LdCache {
155            old_format: Some(old_cache_complete),
156            new_format: None,
157            string_table,
158            string_table_offset: 0,  // Old format uses relative offsets
159        })
160    }
161}
162
163fn parse_new_format_only(input: &[u8]) -> Result<LdCache, CacheError> {
164    let (string_table_start, new_cache) = parse_new_cache_header(input)
165        .map_err(|_| CacheError::ParseError("Failed to parse new cache header".to_string()))?;
166
167    let new_entries_size = new_cache.nlibs as usize * 24;
168    if string_table_start.len() < new_entries_size {
169        return Err(CacheError::TruncatedFile);
170    }
171
172    let (string_table_bytes, new_entries) = nom::multi::count(parse_new_file_entry, new_cache.nlibs as usize).parse(string_table_start)
173        .map_err(|_| CacheError::ParseError("Failed to parse new entries".to_string()))?;
174    
175    // Calculate the absolute offset where the string table starts
176    let string_table_offset = input.len() - string_table_bytes.len();
177
178    let extensions = if new_cache.extension_offset > 0 {
179        let ext_offset = new_cache.extension_offset as usize;
180        if ext_offset < input.len() {
181            parse_extension_directory(&input[ext_offset..]).ok().map(|(_, ext)| ext)
182        } else {
183            None
184        }
185    } else {
186        None
187    };
188
189    let new_cache_complete = NewCache {
190        nlibs: new_cache.nlibs,
191        len_strings: new_cache.len_strings,
192        flags: new_cache.flags,
193        extension_offset: new_cache.extension_offset,
194        entries: new_entries,
195        extensions,
196    };
197
198    let string_table = if new_cache.len_strings > 0 && string_table_bytes.len() >= new_cache.len_strings as usize {
199        string_table_bytes[..new_cache.len_strings as usize].to_vec()
200    } else {
201        string_table_bytes.to_vec()
202    };
203
204    Ok(LdCache {
205        old_format: None,
206        new_format: Some(new_cache_complete),
207        string_table,
208        string_table_offset,
209    })
210}
211
212fn parse_old_cache_header(input: &[u8]) -> IResult<&[u8], OldCache> {
213    let (input, _magic) = tag(OLD_CACHE_MAGIC)(input)?;
214    let (input, nlibs) = le_u32(input)?;
215    
216    Ok((input, OldCache {
217        nlibs,
218        entries: Vec::new(),
219    }))
220}
221
222fn parse_old_file_entry(input: &[u8]) -> IResult<&[u8], OldFileEntry> {
223    let (input, (flags, key, value)) = (le_i32, le_u32, le_u32).parse(input)?;
224    
225    Ok((input, OldFileEntry { flags, key, value }))
226}
227
228fn parse_new_cache_header(input: &[u8]) -> IResult<&[u8], NewCache> {
229    let (input, _magic) = tag(NEW_CACHE_MAGIC)(input)?;
230    let (input, _version) = tag(NEW_CACHE_VERSION)(input)?;
231    let (input, nlibs) = le_u32(input)?;
232    let (input, len_strings) = le_u32(input)?;
233    let (input, flags) = u8(input)?;
234    let (input, _padding) = take(3usize)(input)?;
235    let (input, extension_offset) = le_u32(input)?;
236    let (input, _unused) = take(12usize)(input)?;
237    
238    Ok((input, NewCache {
239        nlibs,
240        len_strings,
241        flags,
242        extension_offset,
243        entries: Vec::new(),
244        extensions: None,
245    }))
246}
247
248fn parse_new_file_entry(input: &[u8]) -> IResult<&[u8], NewFileEntry> {
249    let (input, (flags, key, value, osversion_unused, hwcap)) = 
250        (le_i32, le_u32, le_u32, le_u32, le_u64).parse(input)?;
251    
252    Ok((input, NewFileEntry {
253        flags,
254        key,
255        value,
256        osversion_unused,
257        hwcap,
258    }))
259}
260
261fn parse_extension_directory(input: &[u8]) -> IResult<&[u8], ExtensionDirectory> {
262    let (input, magic) = le_u32(input)?;
263    if magic != EXTENSION_MAGIC {
264        return Err(nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Tag)));
265    }
266    
267    let (input, count) = le_u32(input)?;
268    let (input, sections) = nom::multi::count(parse_extension_section, count as usize).parse(input)?;
269    
270    Ok((input, ExtensionDirectory { count, sections }))
271}
272
273fn parse_extension_section(input: &[u8]) -> IResult<&[u8], ExtensionSection> {
274    let (input, (tag, flags, offset, size)) = (le_u32, le_u32, le_u32, le_u32).parse(input)?;
275    
276    Ok((input, ExtensionSection { tag, flags, offset, size }))
277}
278
279fn align_to_ptr_size(offset: usize) -> usize {
280    let ptr_size = std::mem::size_of::<usize>();
281    (offset + ptr_size - 1) & !(ptr_size - 1)
282}
283
284#[cfg(test)]
285mod tests {
286    use super::*;
287
288    #[test]
289    fn test_parse_old_file_entry() {
290        let data = [
291            1, 0, 0, 0,       // flags = 1
292            0x10, 0, 0, 0,    // key = 16
293            0x20, 0, 0, 0,    // value = 32
294        ];
295        
296        let (remaining, entry) = parse_old_file_entry(&data).unwrap();
297        assert_eq!(remaining.len(), 0);
298        assert_eq!(entry.flags, 1);
299        assert_eq!(entry.key, 16);
300        assert_eq!(entry.value, 32);
301    }
302
303    #[test]
304    fn test_parse_new_file_entry() {
305        let data = [
306            1, 0, 0, 0,                           // flags = 1
307            0x10, 0, 0, 0,                        // key = 16
308            0x20, 0, 0, 0,                        // value = 32
309            0, 0, 0, 0,                           // osversion_unused = 0
310            0x78, 0x56, 0x34, 0x12, 0, 0, 0, 0,   // hwcap = 0x12345678
311        ];
312        
313        let (remaining, entry) = parse_new_file_entry(&data).unwrap();
314        assert_eq!(remaining.len(), 0);
315        assert_eq!(entry.flags, 1);
316        assert_eq!(entry.key, 16);
317        assert_eq!(entry.value, 32);
318        assert_eq!(entry.osversion_unused, 0);
319        assert_eq!(entry.hwcap, 0x1234_5678);
320    }
321
322    #[test]
323    fn test_parse_extension_section() {
324        let data = [
325            1, 0, 0, 0,     // tag = 1
326            0, 0, 0, 0,     // flags = 0
327            0x40, 0, 0, 0,  // offset = 64
328            0x20, 0, 0, 0,  // size = 32
329        ];
330        
331        let (remaining, section) = parse_extension_section(&data).unwrap();
332        assert_eq!(remaining.len(), 0);
333        assert_eq!(section.tag, 1);
334        assert_eq!(section.flags, 0);
335        assert_eq!(section.offset, 64);
336        assert_eq!(section.size, 32);
337    }
338
339    #[test]
340    fn test_align_to_ptr_size() {
341        if std::mem::size_of::<usize>() == 8 {
342            assert_eq!(align_to_ptr_size(0), 0);
343            assert_eq!(align_to_ptr_size(1), 8);
344            assert_eq!(align_to_ptr_size(7), 8);
345            assert_eq!(align_to_ptr_size(8), 8);
346            assert_eq!(align_to_ptr_size(9), 16);
347        }
348    }
349
350    #[test]
351    fn test_old_cache_magic() {
352        let mut data = OLD_CACHE_MAGIC.to_vec();
353        data.extend_from_slice(&[2, 0, 0, 0]); // nlibs = 2
354        
355        let (remaining, cache) = parse_old_cache_header(&data).unwrap();
356        assert_eq!(cache.nlibs, 2);
357        assert_eq!(remaining.len(), 0);
358    }
359
360    #[test]
361    fn test_new_cache_magic() {
362        let mut data = NEW_CACHE_MAGIC.to_vec();
363        data.extend_from_slice(NEW_CACHE_VERSION);
364        data.extend_from_slice(&[
365            2, 0, 0, 0,    // nlibs = 2
366            100, 0, 0, 0,  // len_strings = 100
367            2,             // flags = 2 (little endian)
368            0, 0, 0,       // padding
369            0, 0, 0, 0,    // extension_offset = 0
370            0, 0, 0, 0,    // unused[0]
371            0, 0, 0, 0,    // unused[1]
372            0, 0, 0, 0,    // unused[2]
373        ]);
374        
375        let (remaining, cache) = parse_new_cache_header(&data).unwrap();
376        assert_eq!(cache.nlibs, 2);
377        assert_eq!(cache.len_strings, 100);
378        assert_eq!(cache.flags, 2);
379        assert_eq!(cache.extension_offset, 0);
380        assert_eq!(remaining.len(), 0);
381    }
382}