ld-so-cache 0.1.0

A parser for glibc ld.so.cache files
Documentation
//! Binary parsers for ld.so.cache file formats.
//!
//! This module contains the core parsing logic using the nom parser combinator library.
//! It handles both the legacy ld.so-1.7.0 format and the modern glibc format with
//! hardware capabilities and extensions.
//!
//! The parsing is designed to be resilient to malformed data and will attempt to
//! extract as much information as possible even from partially corrupted files.

use nom::{
    bytes::complete::{tag, take},
    number::complete::{le_u32, le_u64, le_i32, u8},
    Parser,
    IResult,
};

use crate::{
    LdCache, OldCache, NewCache, OldFileEntry, NewFileEntry,
    ExtensionDirectory, ExtensionSection, CacheError,
};

const OLD_CACHE_MAGIC: &[u8] = b"ld.so-1.7.0";
const NEW_CACHE_MAGIC: &[u8] = b"glibc-ld.so.cache";
const NEW_CACHE_VERSION: &[u8] = b"1.1";
const EXTENSION_MAGIC: u32 = 0xEA8D_4E78;

/// Parses an ld.so.cache file from raw bytes.
///
/// This is the main entry point for parsing cache files. It automatically detects
/// the format (old, new, or combined) and delegates to the appropriate parser.
///
/// # Arguments
///
/// * `input` - Raw bytes of the cache file
///
/// # Returns
///
/// A parsed `LdCache` structure containing all discovered cache data.
///
/// # Errors
///
/// * `CacheError::TruncatedFile` - Input is too short to contain valid cache data
/// * `CacheError::InvalidMagic` - File doesn't start with a recognized magic number
/// * `CacheError::ParseError` - Parsing failed due to malformed data
///
/// # Examples
///
/// ```rust
/// use ld_so_cache::parsers::parse_ld_cache;
/// use std::fs;
///
/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
/// let data = fs::read("/etc/ld.so.cache")?;
/// let cache = parse_ld_cache(&data)?;
///
/// println!("Found {} library entries", cache.get_entries()?.len());
/// # Ok(())
/// # }
/// ```
pub fn parse_ld_cache(input: &[u8]) -> Result<LdCache, CacheError> {
    let original_input = input;
    
    if input.len() < 15 {  // Minimum for old format header
        return Err(CacheError::TruncatedFile);
    }

    if input.starts_with(NEW_CACHE_MAGIC) && input.len() >= 20 && &input[17..20] == NEW_CACHE_VERSION {
        parse_new_format_only(original_input)
    } else if input.starts_with(OLD_CACHE_MAGIC) {
        parse_old_format_with_possible_new(original_input)
    } else {
        Err(CacheError::InvalidMagic)
    }
}

fn parse_old_format_with_possible_new(input: &[u8]) -> Result<LdCache, CacheError> {
    let (remaining, old_cache) = parse_old_cache_header(input)
        .map_err(|_| CacheError::ParseError("Failed to parse old cache header".to_string()))?;

    let old_entries_size = old_cache.nlibs as usize * 12;
    if remaining.len() < old_entries_size {
        return Err(CacheError::TruncatedFile);
    }

    let (after_old_entries, old_entries) = nom::multi::count(parse_old_file_entry, old_cache.nlibs as usize).parse(remaining)
        .map_err(|_| CacheError::ParseError("Failed to parse old entries".to_string()))?;

    let old_cache_complete = OldCache {
        nlibs: old_cache.nlibs,
        entries: old_entries,
    };

    let aligned_offset = align_to_ptr_size(after_old_entries.as_ptr() as usize - input.as_ptr() as usize);
    let padding_needed = aligned_offset - (after_old_entries.as_ptr() as usize - input.as_ptr() as usize);
    
    if after_old_entries.len() < padding_needed {
        let string_table = after_old_entries.to_vec();
        return Ok(LdCache {
            old_format: Some(old_cache_complete),
            new_format: None,
            string_table,
            string_table_offset: 0,  // Old format uses relative offsets
        });
    }

    let after_padding = &after_old_entries[padding_needed..];
    
    if after_padding.len() >= 20 && after_padding.starts_with(NEW_CACHE_MAGIC) && &after_padding[17..20] == NEW_CACHE_VERSION {
        let (string_table_start, new_cache) = parse_new_cache_header(after_padding)
            .map_err(|_| CacheError::ParseError("Failed to parse new cache header".to_string()))?;

        let new_entries_size = new_cache.nlibs as usize * 24;
        if string_table_start.len() < new_entries_size {
            return Err(CacheError::TruncatedFile);
        }

        let (string_table_bytes, new_entries) = nom::multi::count(parse_new_file_entry, new_cache.nlibs as usize).parse(string_table_start)
            .map_err(|_| CacheError::ParseError("Failed to parse new entries".to_string()))?;

        let extensions = if new_cache.extension_offset > 0 {
            let ext_offset = new_cache.extension_offset as usize;
            if ext_offset < input.len() {
                parse_extension_directory(&input[ext_offset..]).ok().map(|(_, ext)| ext)
            } else {
                None
            }
        } else {
            None
        };

        let new_cache_complete = NewCache {
            nlibs: new_cache.nlibs,
            len_strings: new_cache.len_strings,
            flags: new_cache.flags,
            extension_offset: new_cache.extension_offset,
            entries: new_entries,
            extensions,
        };

        let string_table = if new_cache.len_strings > 0 && string_table_bytes.len() >= new_cache.len_strings as usize {
            string_table_bytes[..new_cache.len_strings as usize].to_vec()
        } else {
            string_table_bytes.to_vec()
        };

        Ok(LdCache {
            old_format: Some(old_cache_complete),
            new_format: Some(new_cache_complete),
            string_table,
            string_table_offset: 0,  // Old format uses relative offsets
        })
    } else {
        let string_table = after_old_entries.to_vec();
        Ok(LdCache {
            old_format: Some(old_cache_complete),
            new_format: None,
            string_table,
            string_table_offset: 0,  // Old format uses relative offsets
        })
    }
}

fn parse_new_format_only(input: &[u8]) -> Result<LdCache, CacheError> {
    let (string_table_start, new_cache) = parse_new_cache_header(input)
        .map_err(|_| CacheError::ParseError("Failed to parse new cache header".to_string()))?;

    let new_entries_size = new_cache.nlibs as usize * 24;
    if string_table_start.len() < new_entries_size {
        return Err(CacheError::TruncatedFile);
    }

    let (string_table_bytes, new_entries) = nom::multi::count(parse_new_file_entry, new_cache.nlibs as usize).parse(string_table_start)
        .map_err(|_| CacheError::ParseError("Failed to parse new entries".to_string()))?;
    
    // Calculate the absolute offset where the string table starts
    let string_table_offset = input.len() - string_table_bytes.len();

    let extensions = if new_cache.extension_offset > 0 {
        let ext_offset = new_cache.extension_offset as usize;
        if ext_offset < input.len() {
            parse_extension_directory(&input[ext_offset..]).ok().map(|(_, ext)| ext)
        } else {
            None
        }
    } else {
        None
    };

    let new_cache_complete = NewCache {
        nlibs: new_cache.nlibs,
        len_strings: new_cache.len_strings,
        flags: new_cache.flags,
        extension_offset: new_cache.extension_offset,
        entries: new_entries,
        extensions,
    };

    let string_table = if new_cache.len_strings > 0 && string_table_bytes.len() >= new_cache.len_strings as usize {
        string_table_bytes[..new_cache.len_strings as usize].to_vec()
    } else {
        string_table_bytes.to_vec()
    };

    Ok(LdCache {
        old_format: None,
        new_format: Some(new_cache_complete),
        string_table,
        string_table_offset,
    })
}

fn parse_old_cache_header(input: &[u8]) -> IResult<&[u8], OldCache> {
    let (input, _magic) = tag(OLD_CACHE_MAGIC)(input)?;
    let (input, nlibs) = le_u32(input)?;
    
    Ok((input, OldCache {
        nlibs,
        entries: Vec::new(),
    }))
}

fn parse_old_file_entry(input: &[u8]) -> IResult<&[u8], OldFileEntry> {
    let (input, (flags, key, value)) = (le_i32, le_u32, le_u32).parse(input)?;
    
    Ok((input, OldFileEntry { flags, key, value }))
}

fn parse_new_cache_header(input: &[u8]) -> IResult<&[u8], NewCache> {
    let (input, _magic) = tag(NEW_CACHE_MAGIC)(input)?;
    let (input, _version) = tag(NEW_CACHE_VERSION)(input)?;
    let (input, nlibs) = le_u32(input)?;
    let (input, len_strings) = le_u32(input)?;
    let (input, flags) = u8(input)?;
    let (input, _padding) = take(3usize)(input)?;
    let (input, extension_offset) = le_u32(input)?;
    let (input, _unused) = take(12usize)(input)?;
    
    Ok((input, NewCache {
        nlibs,
        len_strings,
        flags,
        extension_offset,
        entries: Vec::new(),
        extensions: None,
    }))
}

fn parse_new_file_entry(input: &[u8]) -> IResult<&[u8], NewFileEntry> {
    let (input, (flags, key, value, osversion_unused, hwcap)) = 
        (le_i32, le_u32, le_u32, le_u32, le_u64).parse(input)?;
    
    Ok((input, NewFileEntry {
        flags,
        key,
        value,
        osversion_unused,
        hwcap,
    }))
}

fn parse_extension_directory(input: &[u8]) -> IResult<&[u8], ExtensionDirectory> {
    let (input, magic) = le_u32(input)?;
    if magic != EXTENSION_MAGIC {
        return Err(nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Tag)));
    }
    
    let (input, count) = le_u32(input)?;
    let (input, sections) = nom::multi::count(parse_extension_section, count as usize).parse(input)?;
    
    Ok((input, ExtensionDirectory { count, sections }))
}

fn parse_extension_section(input: &[u8]) -> IResult<&[u8], ExtensionSection> {
    let (input, (tag, flags, offset, size)) = (le_u32, le_u32, le_u32, le_u32).parse(input)?;
    
    Ok((input, ExtensionSection { tag, flags, offset, size }))
}

fn align_to_ptr_size(offset: usize) -> usize {
    let ptr_size = std::mem::size_of::<usize>();
    (offset + ptr_size - 1) & !(ptr_size - 1)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_parse_old_file_entry() {
        let data = [
            1, 0, 0, 0,       // flags = 1
            0x10, 0, 0, 0,    // key = 16
            0x20, 0, 0, 0,    // value = 32
        ];
        
        let (remaining, entry) = parse_old_file_entry(&data).unwrap();
        assert_eq!(remaining.len(), 0);
        assert_eq!(entry.flags, 1);
        assert_eq!(entry.key, 16);
        assert_eq!(entry.value, 32);
    }

    #[test]
    fn test_parse_new_file_entry() {
        let data = [
            1, 0, 0, 0,                           // flags = 1
            0x10, 0, 0, 0,                        // key = 16
            0x20, 0, 0, 0,                        // value = 32
            0, 0, 0, 0,                           // osversion_unused = 0
            0x78, 0x56, 0x34, 0x12, 0, 0, 0, 0,   // hwcap = 0x12345678
        ];
        
        let (remaining, entry) = parse_new_file_entry(&data).unwrap();
        assert_eq!(remaining.len(), 0);
        assert_eq!(entry.flags, 1);
        assert_eq!(entry.key, 16);
        assert_eq!(entry.value, 32);
        assert_eq!(entry.osversion_unused, 0);
        assert_eq!(entry.hwcap, 0x1234_5678);
    }

    #[test]
    fn test_parse_extension_section() {
        let data = [
            1, 0, 0, 0,     // tag = 1
            0, 0, 0, 0,     // flags = 0
            0x40, 0, 0, 0,  // offset = 64
            0x20, 0, 0, 0,  // size = 32
        ];
        
        let (remaining, section) = parse_extension_section(&data).unwrap();
        assert_eq!(remaining.len(), 0);
        assert_eq!(section.tag, 1);
        assert_eq!(section.flags, 0);
        assert_eq!(section.offset, 64);
        assert_eq!(section.size, 32);
    }

    #[test]
    fn test_align_to_ptr_size() {
        if std::mem::size_of::<usize>() == 8 {
            assert_eq!(align_to_ptr_size(0), 0);
            assert_eq!(align_to_ptr_size(1), 8);
            assert_eq!(align_to_ptr_size(7), 8);
            assert_eq!(align_to_ptr_size(8), 8);
            assert_eq!(align_to_ptr_size(9), 16);
        }
    }

    #[test]
    fn test_old_cache_magic() {
        let mut data = OLD_CACHE_MAGIC.to_vec();
        data.extend_from_slice(&[2, 0, 0, 0]); // nlibs = 2
        
        let (remaining, cache) = parse_old_cache_header(&data).unwrap();
        assert_eq!(cache.nlibs, 2);
        assert_eq!(remaining.len(), 0);
    }

    #[test]
    fn test_new_cache_magic() {
        let mut data = NEW_CACHE_MAGIC.to_vec();
        data.extend_from_slice(NEW_CACHE_VERSION);
        data.extend_from_slice(&[
            2, 0, 0, 0,    // nlibs = 2
            100, 0, 0, 0,  // len_strings = 100
            2,             // flags = 2 (little endian)
            0, 0, 0,       // padding
            0, 0, 0, 0,    // extension_offset = 0
            0, 0, 0, 0,    // unused[0]
            0, 0, 0, 0,    // unused[1]
            0, 0, 0, 0,    // unused[2]
        ]);
        
        let (remaining, cache) = parse_new_cache_header(&data).unwrap();
        assert_eq!(cache.nlibs, 2);
        assert_eq!(cache.len_strings, 100);
        assert_eq!(cache.flags, 2);
        assert_eq!(cache.extension_offset, 0);
        assert_eq!(remaining.len(), 0);
    }
}