Skip to main content

memf_linux/
preload_scanner.rs

1//! Library prevalence analysis for LD_PRELOAD rootkit detection.
2
3/// A shared library and how widely it is mapped across processes — the core
4/// signal for LD_PRELOAD-style rootkit detection: a malicious preload is injected
5/// into *every* process, so an unusually high prevalence is suspicious.
6#[derive(Debug)]
7pub struct GloballyLoadedLibrary {
8    /// Filesystem path of the shared object (e.g. `/usr/lib/evil.so`).
9    pub path: String,
10    /// Number of inspected PIDs that have this library mapped.
11    pub present_in_pid_count: usize,
12    /// Total number of PIDs inspected (the prevalence denominator).
13    pub total_pids_checked: usize,
14    /// Fraction of inspected processes mapping this library, in `[0.0, 1.0]`.
15    pub prevalence: f64,
16    /// Optional ELF capability analysis of the library, when available.
17    pub elf_report: Option<crate::elf_analysis::ElfCapabilityReport>,
18}
19
20/// One row of Volatility's `linux.elfs` output — a memory-mapped ELF image in a
21/// process, used as an alternative prevalence source to `/proc/<pid>/maps`.
22#[derive(Debug, Clone)]
23pub struct VolatilityElfEntry {
24    /// Owning process id.
25    pub pid: u32,
26    /// Owning process name.
27    pub process_name: String,
28    /// Mapping start virtual address.
29    pub start: u64,
30    /// Mapping end virtual address.
31    pub end: u64,
32    /// Filesystem path of the mapped ELF image.
33    pub path: String,
34}
35
36/// Rank shared objects by how many inspected processes map them, keeping only
37/// `.so` libraries whose prevalence meets `threshold` — the LD_PRELOAD candidates.
38pub fn find_globally_loaded_libraries(
39    proc_maps: &[(u32, Vec<String>)],
40    threshold: f64,
41) -> Vec<GloballyLoadedLibrary> {
42    use std::collections::HashMap;
43    let total = proc_maps.len();
44    if total == 0 {
45        return vec![];
46    }
47    let mut counts: HashMap<String, usize> = HashMap::new();
48    for (_, paths) in proc_maps {
49        let unique: std::collections::HashSet<&str> = paths.iter().map(String::as_str).collect();
50        for p in unique {
51            *counts.entry(p.to_string()).or_default() += 1;
52        }
53    }
54    counts
55        .into_iter()
56        .filter(|(path, count)| {
57            // Case-sensitive by design: Linux filesystems are case-sensitive and
58            // shared objects are lowercase `.so` — a case-insensitive match would
59            // mis-classify unrelated `.SO` paths as libraries.
60            #[allow(clippy::case_sensitive_file_extension_comparisons)]
61            let is_so = path.ends_with(".so") || path.contains(".so.");
62            is_so && (*count as f64 / total as f64) >= threshold
63        })
64        .map(|(path, count)| {
65            let prevalence = count as f64 / total as f64;
66            GloballyLoadedLibrary {
67                path,
68                present_in_pid_count: count,
69                total_pids_checked: total,
70                prevalence,
71                elf_report: None,
72            }
73        })
74        .collect()
75}
76
77/// Parse Volatility `linux.elfs` TSV output into [`VolatilityElfEntry`] rows
78/// (skips the header and blank/comment lines; tolerant of short rows).
79pub fn parse_linux_elfs_tsv(content: &str) -> Vec<VolatilityElfEntry> {
80    content
81        .lines()
82        .skip(1)
83        .filter(|l| !l.trim().is_empty() && !l.starts_with('#'))
84        .filter_map(|line| {
85            let cols: Vec<&str> = line.splitn(5, '\t').collect();
86            if cols.len() < 5 {
87                return None;
88            }
89            Some(VolatilityElfEntry {
90                pid: cols[0].trim().parse().ok()?,
91                process_name: cols[1].trim().to_string(),
92                start: u64::from_str_radix(cols[2].trim().trim_start_matches("0x"), 16).ok()?,
93                end: u64::from_str_radix(cols[3].trim().trim_start_matches("0x"), 16).ok()?,
94                path: cols[4].trim().to_string(),
95            })
96        })
97        .collect()
98}
99
100/// Prevalence ranking from [`VolatilityElfEntry`] rows: each path's fraction of
101/// distinct PIDs mapping it, filtered by `threshold`. Returns `(path, prevalence)`.
102pub fn find_globally_loaded_from_elfs(
103    entries: &[VolatilityElfEntry],
104    threshold: f64,
105) -> Vec<(String, f64)> {
106    use std::collections::HashMap;
107    let mut pid_sets: HashMap<&str, std::collections::HashSet<u32>> = HashMap::new();
108    for e in entries {
109        pid_sets.entry(&e.path).or_default().insert(e.pid);
110    }
111    let total_pids: std::collections::HashSet<u32> = entries.iter().map(|e| e.pid).collect();
112    let n = total_pids.len() as f64;
113    if n == 0.0 {
114        return vec![];
115    }
116    let mut result: Vec<(String, f64)> = pid_sets
117        .into_iter()
118        .filter_map(|(path, pids)| {
119            let prevalence = pids.len() as f64 / n;
120            if prevalence >= threshold {
121                Some((path.to_string(), prevalence))
122            } else {
123                None
124            }
125        })
126        .collect();
127    result.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
128    result
129}
130
131#[cfg(test)]
132mod tests {
133    use super::*;
134
135    #[test]
136    fn find_globally_loaded_empty_input_returns_empty() {
137        let result = find_globally_loaded_libraries(&[], 0.9);
138        assert!(result.is_empty());
139    }
140
141    #[test]
142    fn find_globally_loaded_library_in_all_pids_found() {
143        let maps = vec![
144            (
145                1u32,
146                vec!["/lib/evil.so".to_string(), "/lib/libc.so.6".to_string()],
147            ),
148            (
149                2u32,
150                vec!["/lib/evil.so".to_string(), "/lib/libc.so.6".to_string()],
151            ),
152            (
153                3u32,
154                vec![
155                    "/lib/evil.so".to_string(),
156                    "/lib/libpthread.so.0".to_string(),
157                ],
158            ),
159        ];
160        let result = find_globally_loaded_libraries(&maps, 1.0);
161        let paths: Vec<&str> = result.iter().map(|l| l.path.as_str()).collect();
162        assert!(
163            paths.contains(&"/lib/evil.so"),
164            "evil.so present in all pids should be found"
165        );
166    }
167
168    #[test]
169    fn find_globally_loaded_library_in_half_pids_below_threshold() {
170        let maps = vec![
171            (1u32, vec!["/lib/half.so".to_string()]),
172            (2u32, vec!["/lib/other.so".to_string()]),
173        ];
174        // 50% prevalence should be excluded at threshold=0.9
175        let result = find_globally_loaded_libraries(&maps, 0.9);
176        let paths: Vec<&str> = result.iter().map(|l| l.path.as_str()).collect();
177        assert!(
178            !paths.contains(&"/lib/half.so"),
179            "half.so at 50% should not pass 90% threshold"
180        );
181    }
182
183    #[test]
184    fn find_globally_loaded_respects_threshold_parameter() {
185        let maps = vec![
186            (1u32, vec!["/lib/half.so".to_string()]),
187            (2u32, vec!["/lib/half.so".to_string()]),
188            (3u32, vec!["/lib/other.so".to_string()]),
189            (4u32, vec!["/lib/other.so".to_string()]),
190        ];
191        // 50% threshold → both should appear (half.so in 50% of pids)
192        let result = find_globally_loaded_libraries(&maps, 0.5);
193        assert!(
194            !result.is_empty(),
195            "at 50% threshold, libraries at 50% prevalence should appear"
196        );
197        let result_75 = find_globally_loaded_libraries(&maps, 0.75);
198        let paths_75: Vec<&str> = result_75.iter().map(|l| l.path.as_str()).collect();
199        assert!(
200            !paths_75.contains(&"/lib/half.so"),
201            "at 75% threshold, 50% library should be excluded"
202        );
203    }
204
205    #[test]
206    fn parse_linux_elfs_tsv_empty_returns_empty() {
207        assert!(parse_linux_elfs_tsv("").is_empty());
208    }
209
210    #[test]
211    fn parse_linux_elfs_tsv_parses_pid_and_path() {
212        let tsv = "PID\tProcess\tStart\tEnd\tFile\n\
213                   1234\tbash\t0x7f000000\t0x7f001000\t/lib/evil.so\n";
214        let entries = parse_linux_elfs_tsv(tsv);
215        assert_eq!(entries.len(), 1);
216        assert_eq!(entries[0].pid, 1234);
217        assert_eq!(entries[0].path, "/lib/evil.so");
218        assert_eq!(entries[0].process_name, "bash");
219    }
220
221    #[test]
222    fn parse_linux_elfs_tsv_skips_header_line() {
223        let tsv = "PID\tProcess\tStart\tEnd\tFile\n";
224        let entries = parse_linux_elfs_tsv(tsv);
225        assert!(
226            entries.is_empty(),
227            "header-only TSV should parse to empty vec"
228        );
229    }
230
231    #[test]
232    fn parse_linux_elfs_tsv_handles_hex_addresses() {
233        let tsv = "PID\tProcess\tStart\tEnd\tFile\n\
234                   42\tinit\t0xdeadbeef\t0xdeadc0de\t/lib/x.so\n";
235        let entries = parse_linux_elfs_tsv(tsv);
236        assert_eq!(entries[0].start, 0xdeadbeef);
237        assert_eq!(entries[0].end, 0xdeadc0de);
238    }
239
240    #[test]
241    fn find_globally_loaded_from_elfs_library_in_all_pids() {
242        let entries = vec![
243            VolatilityElfEntry {
244                pid: 1,
245                process_name: "a".into(),
246                start: 0,
247                end: 0,
248                path: "/lib/evil.so".into(),
249            },
250            VolatilityElfEntry {
251                pid: 2,
252                process_name: "b".into(),
253                start: 0,
254                end: 0,
255                path: "/lib/evil.so".into(),
256            },
257            VolatilityElfEntry {
258                pid: 3,
259                process_name: "c".into(),
260                start: 0,
261                end: 0,
262                path: "/lib/evil.so".into(),
263            },
264        ];
265        let result = find_globally_loaded_from_elfs(&entries, 1.0);
266        assert_eq!(result.len(), 1);
267        assert_eq!(result[0].0, "/lib/evil.so");
268        assert!((result[0].1 - 1.0).abs() < f64::EPSILON);
269    }
270
271    #[test]
272    fn find_globally_loaded_from_elfs_sorted_by_prevalence() {
273        let entries = vec![
274            VolatilityElfEntry {
275                pid: 1,
276                process_name: "a".into(),
277                start: 0,
278                end: 0,
279                path: "/lib/always.so".into(),
280            },
281            VolatilityElfEntry {
282                pid: 2,
283                process_name: "b".into(),
284                start: 0,
285                end: 0,
286                path: "/lib/always.so".into(),
287            },
288            VolatilityElfEntry {
289                pid: 1,
290                process_name: "a".into(),
291                start: 0,
292                end: 0,
293                path: "/lib/sometimes.so".into(),
294            },
295        ];
296        let result = find_globally_loaded_from_elfs(&entries, 0.1);
297        assert!(
298            result.len() >= 2,
299            "both libraries should appear at 10% threshold"
300        );
301        // Sorted descending: always.so (100%) before sometimes.so (50%)
302        assert_eq!(result[0].0, "/lib/always.so");
303    }
304}