ldcache_rs/
lib.rs

1//! Library to parse ld.so.cache according to the numerous format define in glibc
2//!
3//! This library doesn't use any c bindings or doesn't try to create a structure to match
4//! it over the data, it uses plain parsing and thus allow the definition of endianness.
5//! As parsing ld.so.cache is useful to get access to the symbol, the parsing is done
6//! stringently as well as fastly.
7//!
8//! If you want to make a dirt cheap parser for ld.so.cache a simple call to
9//! `strings /etc/ld.so.cache` will do.
10
11use std::collections::hash_map::Iter;
12use std::collections::HashMap;
13use std::convert::TryFrom;
14use std::fs::File;
15use std::io::Read;
16
17pub const LD_SO_CACHE: &str = "/etc/ld.so.cache";
18pub const OLD_HEADER: &str = "ld.so-1.7.0";
19pub const OLD_VERSION: &str = "1.7.0";
20// 11 +1 = 12 bytes or 3x4 bounds
21pub const PADDING_OLD: usize = 1;
22pub const HEADER: &str = "glibc-ld.so.cache";
23pub const CURRENT_VERSION: &str = "1.1";
24pub const VERSION_SIZE: usize = 3;
25// 17+3+0 = 20 bytes or 5x4 bounds
26pub const PADDING_NEW: usize = 0;
27pub const HEADER_LEN: usize = if OLD_HEADER.len() > HEADER.len() {
28    OLD_HEADER.len()
29} else {
30    HEADER.len()
31};
32
33// we consider char to be of u8 and not full unicode as rust defines it
34pub const U8_SIZE: usize = std::mem::size_of::<u8>();
35pub const U32_SIZE: usize = std::mem::size_of::<u32>();
36pub const U64_SIZE: usize = std::mem::size_of::<u64>();
37
38#[derive(Clone, Debug, Default)]
39pub struct Cache {
40    /// Boolean to say what type of parser should be used
41    _is_old: bool,
42    /// Cache version as parsed from ld.so.cache (usually 1.1 for the new one)
43    pub version: String,
44    /// number of entries in the cache (parsed from the ld.so.cache)
45    pub count: u32,
46    /// string table length
47    pub strlen: Option<u32>,
48    /// flags for endianness (as of 2.33) values are
49    ///     0: Not Set
50    ///     1: Invalid
51    ///     2: Little
52    ///     3: Big
53    pub flags: Option<u8>,
54    /// File offset of the extension directory (as of 2.33)
55    pub extension_offset: Option<u32>,
56    /// list of entries, we use a hashmap as the use case
57    /// is more to retrieve a path from a lib name
58    pub entries: HashMap<String, Entry>,
59}
60
61#[derive(Clone, Debug, Default)]
62pub struct Entry {
63    pub flags: i32,
64    pub libname: Vec<String>,
65    pub path: Vec<String>,
66    key: u32,
67    value: u32,
68    pub os_version: Option<u32>,
69    pub hwcap: Option<u64>,
70}
71
72#[derive(Debug, thiserror::Error)]
73pub enum CacheError {
74    #[error(transparent)]
75    IO(#[from] std::io::Error),
76    #[error("Invalid header (expected {expected:?}, found {found:?})")]
77    InvalidHeader { expected: String, found: String },
78    #[error("Invalid size for the data, should be at least {0}")]
79    InvalidSize(usize),
80    #[error("Missing a slice at index {0}")]
81    MissingSlice(u32),
82    #[error(
83        "Invalid string at index {index:?} for stream starting with {stream:?} with {error:?}"
84    )]
85    InvalidString {
86        index: usize,
87        stream: Vec<u8>,
88        error: std::str::Utf8Error,
89    },
90    #[error("Unknown")]
91    Unknown,
92}
93
94#[derive(Copy, Clone, Debug)]
95pub enum TargetEndian {
96    Native,
97    Big,
98    Little,
99}
100
101/// The cache have two possible interpretations
102/// ```c
103/// struct file_entry
104/// {
105///   int flags;		/* This is 1 for an ELF library.  */
106///   unsigned int key, value; /* String table indices.  */
107/// };
108///
109/// struct cache_file
110/// {
111///   char magic[sizeof CACHEMAGIC - 1];
112///   unsigned int nlibs;
113///   struct file_entry libs[0];
114/// };
115///
116/// #define CACHEMAGIC_NEW "glibc-ld.so.cache"
117/// #define CACHE_VERSION "1.1"
118/// #define CACHEMAGIC_VERSION_NEW CACHEMAGIC_NEW CACHE_VERSION
119///
120///
121/// struct file_entry_new
122/// {
123///   int32_t flags;		/* This is 1 for an ELF library.  */
124///   uint32_t key, value;		/* String table indices.  */
125///   uint32_t osversion;		/* Required OS version.	 */
126///   uint64_t hwcap;		/* Hwcap entry.	 */
127/// };
128///
129/// struct cache_file_new
130/// {
131///   char magic[sizeof CACHEMAGIC_NEW - 1];
132///   char version[sizeof CACHE_VERSION - 1];
133///   uint32_t nlibs;		/* Number of entries.  */
134///   uint32_t len_strings;		/* Size of string table. */
135///   uint32_t unused[5];		/* Leave space for future extensions and align to 8 byte boundary.  */
136///   struct file_entry_new libs[0]; /* Entries describing libraries.  */
137///   /* After this the string table of size len_strings is found.	*/
138/// };
139/// ```
140/// as of 2.33 we know use 2 of the unused
141/// ```c
142/// struct cache_file_new {
143///   char magic[sizeof CACHEMAGIC_NEW - 1];
144///   char version[sizeof CACHE_VERSION - 1];
145///   uint32_t nlibs;		/* Number of entries.  */
146///   uint32_t len_strings;		/* Size of string table. */
147///
148///   /* flags & cache_file_new_flags_endian_mask is one of the values
149///      cache_file_new_flags_endian_unset, cache_file_new_flags_endian_invalid,
150///      cache_file_new_flags_endian_little, cache_file_new_flags_endian_big.
151///
152///      The remaining bits are unused and should be generated as zero and
153///      ignored by readers.  */
154///   uint8_t flags;
155///
156///   uint8_t padding_unsed[3];	/* Not used, for future extensions.  */
157///
158///   /* File offset of the extension directory.  See struct
159///      cache_extension below.  Must be a multiple of four.  */
160///   uint32_t extension_offset;
161///
162///   uint32_t unused[3];		/* Leave space for future extensions
163/// 				   and align to 8 byte boundary.  */
164///   struct file_entry_new libs[0]; /* Entries describing libraries.  */
165///   /* After this the string table of size len_strings is found.	*/
166/// };
167///```
168/// As a side note, 5 was chosen because you have len_strings which was added compared to
169/// the usual format so (5+1)*4=24 bytes or 3*8 bytes.
170impl Cache {
171    pub fn new() -> Result<Cache, CacheError> {
172        let mut file = File::open(LD_SO_CACHE)?;
173        let mut buf: Vec<u8> = Vec::new();
174        if file.read_to_end(&mut buf)? == 0 {
175            return Err(CacheError::InvalidSize(HEADER_LEN));
176        }
177        Self::parse(&*buf, TargetEndian::Native)
178    }
179
180    pub fn parse(buf: &[u8], endianness: TargetEndian) -> Result<Cache, CacheError> {
181        let (mut cache, index) = Self::parse_struct(buf, endianness)?;
182        cache.parse_entries(buf, index, endianness)?;
183        Ok(cache)
184    }
185
186    fn parse_entries(
187        &mut self,
188        buf: &[u8],
189        mut index: usize,
190        endianness: TargetEndian,
191    ) -> Result<(), CacheError> {
192        let n = if self._is_old { 3 } else { 5 };
193        if buf.len() < n * U32_SIZE * self.count as usize {
194            return Err(CacheError::InvalidSize(n * U32_SIZE * self.count as usize));
195        }
196        let start_header = index + self.count as usize * U32_SIZE * n;
197        for _ in 0..self.count {
198            let flags: i32 = Self::read_i32(Self::get_next_4b_slice(buf, index, 0)?, &endianness);
199            let key: u32 = Self::read_u32(Self::get_next_4b_slice(buf, index, 1)?, &endianness);
200            let value: u32 = Self::read_u32(Self::get_next_4b_slice(buf, index, 2)?, &endianness);
201            index += U32_SIZE * 3;
202            let mut os_version: Option<u32> = None;
203            let mut hwcap: Option<u64> = None;
204            if !self._is_old {
205                os_version = Some(Self::read_u32(
206                    Self::get_next_4b_slice(buf, index, 0)?,
207                    &endianness,
208                ));
209                hwcap = Some(Self::read_u64(
210                    Self::get_next_8b_slice(buf, index + U32_SIZE, 0)?,
211                    &endianness,
212                ));
213                index += U32_SIZE * 3;
214            }
215            let start = key as usize + if self._is_old { start_header } else { 0 };
216            let libname = Self::str_from_u8_nul_utf8(&buf[start..]).map_err(|e| {
217                CacheError::InvalidString {
218                    index: start,
219                    stream: Vec::from(&buf[start..std::cmp::min(buf.len(), start + 10)]),
220                    error: e,
221                }
222            })?;
223            let start = value as usize + if self._is_old { start_header } else { 0 };
224            let path = Self::str_from_u8_nul_utf8(&buf[start..]).map_err(|e| {
225                CacheError::InvalidString {
226                    index: start,
227                    stream: Vec::from(&buf[start..std::cmp::min(buf.len(), start + 10)]),
228                    error: e,
229                }
230            })?;
231            self.entries
232                .entry(libname.to_string())
233                .and_modify(|e| {
234                    e.libname.push(libname.to_string());
235                    e.path.push(path.to_string());
236                })
237                .or_insert(Entry {
238                    flags,
239                    libname: vec![libname.to_string()],
240                    path: vec![path.to_string()],
241                    key,
242                    value,
243                    os_version,
244                    hwcap,
245                });
246        }
247        Ok(())
248    }
249
250    pub fn str_from_u8_nul_utf8(utf8_src: &[u8]) -> Result<&str, std::str::Utf8Error> {
251        let nul_range_end = utf8_src
252            .iter()
253            .position(|&c| c == b'\0')
254            .unwrap_or(utf8_src.len()); // default to length if no `\0` present
255        ::std::str::from_utf8(&utf8_src[0..nul_range_end])
256    }
257
258    fn get_next_4b_slice(buf: &[u8], mut index: usize, offset: usize) -> Result<&[u8], CacheError> {
259        index = index + offset * U32_SIZE;
260        if buf.len() < index + U32_SIZE {
261            return Err(CacheError::MissingSlice(index as u32));
262        }
263        Ok(&buf[index..index + U32_SIZE])
264    }
265
266    fn get_next_8b_slice(buf: &[u8], mut index: usize, offset: usize) -> Result<&[u8], CacheError> {
267        index = index + offset * U64_SIZE;
268        if buf.len() < index + U64_SIZE {
269            return Err(CacheError::MissingSlice(index as u32));
270        }
271        Ok(&buf[index..index + U64_SIZE])
272    }
273
274    fn parse_struct(buf: &[u8], endianness: TargetEndian) -> Result<(Cache, usize), CacheError> {
275        // parse header to know applicable logic
276        let is_old: bool = Self::parse_header(&buf[..HEADER_LEN * U8_SIZE])?;
277        let mut index: usize = if is_old {
278            OLD_HEADER.len() * U8_SIZE
279        } else {
280            HEADER.len() * U8_SIZE
281        };
282        // assert length is enough to parse the full struct
283        if is_old {
284            if buf.len() < index + PADDING_OLD + U32_SIZE {
285                return Err(CacheError::InvalidSize(index + U32_SIZE));
286            }
287        } else {
288            if buf.len() < index + PADDING_NEW + VERSION_SIZE + U32_SIZE * 7 {
289                return Err(CacheError::InvalidSize(
290                    index + PADDING_NEW + VERSION_SIZE + U32_SIZE * 7,
291                ));
292            }
293        }
294
295        // parse version
296        let version: &str = if !is_old {
297            // parse the version numbers
298            let version = &buf[index..index + VERSION_SIZE];
299            index += VERSION_SIZE;
300            index += PADDING_NEW;
301            std::str::from_utf8(version).map_err(|e| CacheError::InvalidHeader {
302                expected: format!("A correct version like {}", CURRENT_VERSION),
303                found: format!("{:?} with error {}", version, e.to_string()),
304            })?
305        } else {
306            index += PADDING_OLD;
307            OLD_VERSION
308        };
309        // parse libraries count
310        let count: u32 = Self::read_u32(&buf[index..index + U32_SIZE], &endianness);
311        index += U32_SIZE;
312
313        let mut strlen: Option<u32> = None;
314        let mut flags: Option<u8> = None;
315        let mut extension_offset: Option<u32> = None;
316        if !is_old {
317            // parse string table length
318            strlen = Some(Self::read_u32(&buf[index..index + U32_SIZE], &endianness));
319            index += U32_SIZE;
320            flags = Some(*&buf[index]);
321            // the 3 u8 here are unused
322            index += U32_SIZE;
323            extension_offset = Some(Self::read_u32(&buf[index..index + U32_SIZE], &endianness));
324            index += U32_SIZE;
325            for _ in 2..5 {
326                let unused: u32 = Self::read_u32(&buf[index..index + U32_SIZE], &endianness);
327                index += U32_SIZE;
328                if unused != 0 {
329                    eprintln!("format was slightly changed, we advise you to open an issue or look over your data.")
330                }
331            }
332        }
333        Ok((
334            Cache {
335                _is_old: is_old,
336                version: version.to_string(),
337                count,
338                strlen,
339                flags,
340                extension_offset,
341                entries: HashMap::with_capacity(count as usize),
342            },
343            index,
344        ))
345    }
346
347    fn read_u32(buf: &[u8], endianness: &TargetEndian) -> u32 {
348        let buf: [u8; 4] = <[u8; 4]>::try_from(&buf[..4]).unwrap();
349        match endianness {
350            TargetEndian::Native => u32::from_ne_bytes(buf),
351            TargetEndian::Big => u32::from_be_bytes(buf),
352            TargetEndian::Little => u32::from_le_bytes(buf),
353        }
354    }
355
356    fn read_i32(buf: &[u8], endianness: &TargetEndian) -> i32 {
357        let buf: [u8; 4] = <[u8; 4]>::try_from(&buf[..4]).unwrap();
358        match endianness {
359            TargetEndian::Native => i32::from_ne_bytes(buf),
360            TargetEndian::Big => i32::from_be_bytes(buf),
361            TargetEndian::Little => i32::from_le_bytes(buf),
362        }
363    }
364
365    fn read_u64(buf: &[u8], endianness: &TargetEndian) -> u64 {
366        let buf: [u8; 8] = <[u8; 8]>::try_from(&buf[..8]).unwrap();
367        match endianness {
368            TargetEndian::Native => u64::from_ne_bytes(buf),
369            TargetEndian::Big => u64::from_be_bytes(buf),
370            TargetEndian::Little => u64::from_le_bytes(buf),
371        }
372    }
373
374    fn parse_header(buf: &[u8]) -> Result<bool, CacheError> {
375        let mut is_old: bool = false;
376        if let Ok(header) = std::str::from_utf8(&buf[..HEADER.len() * U8_SIZE]) {
377            if header != HEADER {
378                // technically here we should be fixed that the header is incorrect
379                // since the old_header used to have a non parseable character inside the range
380                is_old = true;
381            }
382        } else {
383            is_old = true;
384        }
385        if is_old {
386            let old_header: &str = std::str::from_utf8(&buf[..OLD_HEADER.len() * U8_SIZE])
387                .map_err(|e| CacheError::InvalidHeader {
388                    expected: OLD_HEADER.to_string(),
389                    found: e.to_string(),
390                })?;
391            if old_header != OLD_HEADER {
392                return Err(CacheError::InvalidHeader {
393                    expected: format!("Either {} or {}", HEADER, OLD_HEADER),
394                    found: old_header.to_string(),
395                });
396            }
397        }
398        Ok(is_old)
399    }
400
401    /// Utility function, does the contains check on the entries with the full lib name
402    pub fn contains(&self, key: &str) -> bool {
403        self.entries.contains_key(key)
404    }
405
406    /// Utility function, get the entry based on the full lib name
407    pub fn get(&self, key: &str) -> Option<&Entry> {
408        self.entries.get(key)
409    }
410
411    /// Utility function, get the paths of the lib based on the full lib name
412    pub fn get_paths(&self, key: &str) -> Option<Vec<&str>> {
413        self.get(key)
414            .map(|e| e.path.iter().map(|e| e.as_str()).collect())
415    }
416
417    /// Utility function, get the first path of the lib based on the full lib name
418    pub fn get_path(&self, key: &str) -> Option<&str> {
419        self.get(key)
420            .and_then(|e| e.path.first().map(|x| x.as_str()))
421    }
422
423    /// Utility function, create an iterator over the entries
424    pub fn iter(&self) -> Iter<'_, String, Entry> {
425        self.entries.iter()
426    }
427
428    /// Utility function, return a boolean indicating if there is a partial match
429    /// As this utility will iterate over all elements, if you need the element please
430    /// use get_partial or get_path_partial
431    pub fn contains_partial(&self, key: &str) -> bool {
432        self.iter().any(|e| e.0.contains(key))
433    }
434
435    /// Utility function, return the first element that contains the key inside the full lib
436    /// name (partial match)
437    pub fn get_partial(&self, key: &str) -> Option<&Entry> {
438        self.iter().find(|&e| e.0.contains(key)).map(|e| e.1)
439    }
440
441    /// Utility function, return the first lib paths for which the full lib
442    /// name contains the key (partial match)
443    pub fn get_paths_partial(&self, key: &str) -> Option<Vec<&str>> {
444        self.get_partial(key)
445            .map(|e| e.path.iter().map(|e| e.as_str()).collect())
446    }
447
448    /// Utility function, return the first lib path for which the full lib
449    /// name contains the key (partial match)
450    pub fn get_path_partial(&self, key: &str) -> Option<&str> {
451        self.get_partial(key)
452            .and_then(|e| e.path.first().map(|x| x.as_str()))
453    }
454}
455
456#[cfg(test)]
457mod tests {
458    use super::*;
459
460    #[test]
461    fn test_big_endian_old_format_s390x() {
462        let data = include_bytes!("../tests/ld.so.cache_s390x_old");
463        let cache = Cache::parse(data, TargetEndian::Big);
464        assert!(cache.is_ok());
465        let cache = cache.unwrap();
466        assert_eq!(cache.count, 188);
467        assert_eq!(cache.version, OLD_VERSION);
468        assert_eq!(cache._is_old, true);
469        assert_eq!(cache.strlen, None);
470        assert_eq!(cache.flags, None);
471        assert_eq!(cache.extension_offset, None);
472        let strings: &str = include_str!("../tests/s390x.strings");
473        test_entries(strings, cache);
474    }
475
476    #[test]
477    fn test_little_endian_new_format_mips() {
478        let data = include_bytes!("../tests/ld.so.cache_mips");
479        let cache = Cache::parse(data, TargetEndian::Little);
480        assert!(cache.is_ok());
481        let cache = cache.unwrap();
482        assert_eq!(cache.count, 2407);
483        assert_eq!(cache.version, CURRENT_VERSION);
484        assert_eq!(cache._is_old, false);
485        assert_eq!(cache.strlen, Some(60915));
486        assert_eq!(cache.flags, Some(2)); // little endian
487        assert_eq!(cache.extension_offset, Some(118732));
488        let strings: &str = include_str!("../tests/mips.strings");
489        test_entries(strings, cache);
490    }
491
492    #[test]
493    fn test_little_endian_new_format_debian_x86_64() {
494        let data = include_bytes!("../tests/ld.so.cache_debian");
495        let cache = Cache::parse(data, TargetEndian::Little);
496        assert!(cache.is_ok());
497        let cache = cache.unwrap();
498        assert_eq!(cache.count, 81);
499        assert_eq!(cache.version, CURRENT_VERSION);
500        assert_eq!(cache._is_old, false);
501        assert_eq!(cache.strlen, Some(4188));
502        assert_eq!(cache.flags, Some(2)); // little endian
503        assert_eq!(cache.extension_offset, Some(0));
504        let strings: &str = include_str!("../tests/debian.strings");
505        test_entries(strings, cache);
506    }
507
508    #[test]
509    fn test_little_endian_old_format_debian_x86_64() {
510        let data = include_bytes!("../tests/ld.so.cache_debian_old");
511        let cache = Cache::parse(data, TargetEndian::Little);
512        assert!(cache.is_ok());
513        let cache = cache.unwrap();
514        assert_eq!(cache.count, 148);
515        assert_eq!(cache.version, OLD_VERSION);
516        assert_eq!(cache._is_old, true);
517        assert_eq!(cache.strlen, None);
518        assert_eq!(cache.flags, None);
519        assert_eq!(cache.extension_offset, None);
520        let strings: &str = include_str!("../tests/debian_old.strings");
521        test_entries(strings, cache);
522    }
523
524    fn test_entries(strings: &str, cache: Cache) {
525        for string in strings.split_terminator("\n") {
526            let (lib_name, path) = string.split_once(" ").unwrap();
527            assert!(
528                cache.contains(lib_name),
529                "lib name : {} was not inside the entries",
530                lib_name
531            );
532            let found_paths = cache.get_paths(lib_name);
533            assert!(
534                found_paths.is_some(),
535                "Path was not found for lib name : {}",
536                lib_name
537            );
538            let found_paths = found_paths.unwrap();
539            assert!(
540                found_paths.contains(&path),
541                "lib path : {} was not correct, got {:?}",
542                path,
543                found_paths
544            );
545        }
546    }
547}