Skip to main content

ctcb_strip/
lib.rs

1//! Strip non-essential files from LLVM/Clang toolchain distributions.
2//!
3//! This crate removes unnecessary files from downloaded LLVM distributions
4//! and optionally strips debug symbols from binaries to minimize package size.
5
6use std::fs;
7use std::path::{Path, PathBuf};
8use std::process::Command;
9
10use anyhow::{Context, Result};
11use ctcb_core::{Platform, Target};
12use walkdir::WalkDir;
13
14/// Essential binaries to keep (base names without extensions).
15pub const ESSENTIAL_BINARIES: &[&str] = &[
16    // Core compilation
17    "clang",
18    "clang++",
19    "clang-cl",
20    "clang-cpp",
21    // Linkers
22    "lld",
23    "lld-link",
24    "ld.lld",
25    "ld64.lld",
26    "wasm-ld",
27    // Binary utilities
28    "llvm-ar",
29    "llvm-nm",
30    "llvm-objdump",
31    "llvm-objcopy",
32    "llvm-ranlib",
33    "llvm-strip",
34    "llvm-readelf",
35    "llvm-readobj",
36    "llvm-dlltool",
37    "llvm-lib",
38    // Additional utilities
39    "llvm-as",
40    "llvm-dis",
41    "llvm-symbolizer",
42    "llvm-config",
43];
44
45/// Directories to remove completely (used as reference; the copy-based
46/// approach below only copies what is needed, so these are implicitly skipped).
47#[allow(dead_code)]
48const REMOVE_DIRS: &[&str] = &[
49    "share/doc",
50    "share/man",
51    "docs",
52    "share/clang",
53    "share/opt-viewer",
54    "share/scan-build",
55    "share/scan-view",
56    "python_packages",
57    "libexec",
58];
59
60/// File extensions to remove from lib directories.
61const REMOVE_LIB_EXTENSIONS: &[&str] = &[".a", ".lib", ".cmake"];
62
63/// Configuration for stripping.
64pub struct StripConfig {
65    pub target: Target,
66    pub keep_headers: bool,
67    pub strip_debug: bool,
68    pub verbose: bool,
69}
70
71/// Result statistics from stripping.
72pub struct StripStats {
73    pub original_size: u64,
74    pub final_size: u64,
75    pub files_kept: u64,
76    pub files_removed: u64,
77}
78
79/// Check if a binary name (with or without extension) is in the essential list.
80pub fn is_essential_binary(name: &str) -> bool {
81    let base = name
82        .strip_suffix(".exe")
83        .or_else(|| name.strip_suffix(".dll"))
84        .or_else(|| name.strip_suffix(".so"))
85        .or_else(|| name.strip_suffix(".dylib"))
86        .unwrap_or(name);
87    // Also handle versioned clang like "clang-19"
88    let base = if base.starts_with("clang-")
89        && base[6..].chars().all(|c| c.is_ascii_digit() || c == '.')
90        && !base[6..].is_empty()
91    {
92        "clang"
93    } else {
94        base
95    };
96    ESSENTIAL_BINARIES.contains(&base)
97}
98
99/// Find the LLVM root directory (the one containing a `bin/` subdirectory).
100pub fn find_llvm_root(source_dir: &Path) -> Option<PathBuf> {
101    if source_dir.join("bin").exists() {
102        return Some(source_dir.to_path_buf());
103    }
104    // Check one level of subdirectories
105    if let Ok(entries) = fs::read_dir(source_dir) {
106        for entry in entries.flatten() {
107            if entry.path().join("bin").exists() {
108                return Some(entry.path());
109            }
110        }
111    }
112    None
113}
114
115/// Calculate total size of a directory recursively.
116fn dir_size(path: &Path) -> u64 {
117    WalkDir::new(path)
118        .into_iter()
119        .filter_map(|e| e.ok())
120        .filter(|e| e.file_type().is_file())
121        .map(|e| e.metadata().map(|m| m.len()).unwrap_or(0))
122        .sum()
123}
124
125/// Copy essential files from LLVM root to output directory.
126fn copy_essential_files(
127    llvm_root: &Path,
128    output_dir: &Path,
129    config: &StripConfig,
130) -> Result<(u64, u64)> {
131    let mut kept = 0u64;
132    let mut removed = 0u64;
133
134    fs::create_dir_all(output_dir)?;
135
136    // 1. Copy bin/ directory (filtered to essential binaries only)
137    let src_bin = llvm_root.join("bin");
138    if src_bin.exists() {
139        let dst_bin = output_dir.join("bin");
140        fs::create_dir_all(&dst_bin)?;
141
142        for entry in fs::read_dir(&src_bin)? {
143            let entry = entry?;
144            if entry.file_type()?.is_file() {
145                if is_essential_binary(&entry.file_name().to_string_lossy()) {
146                    fs::copy(entry.path(), dst_bin.join(entry.file_name()))?;
147                    kept += 1;
148                    if config.verbose {
149                        println!("  Keeping: {}", entry.file_name().to_string_lossy());
150                    }
151                } else {
152                    removed += 1;
153                    if config.verbose {
154                        println!("  Removing: {}", entry.file_name().to_string_lossy());
155                    }
156                }
157            }
158        }
159    }
160
161    // 2. Copy lib/ directory (keep clang runtime, dynamic libs; remove static libs and cmake)
162    for lib_dir_name in &["lib", "lib64"] {
163        let src_lib = llvm_root.join(lib_dir_name);
164        if !src_lib.exists() {
165            continue;
166        }
167        let dst_lib = output_dir.join(lib_dir_name);
168        fs::create_dir_all(&dst_lib)?;
169
170        for entry in fs::read_dir(&src_lib)? {
171            let entry = entry?;
172            let name = entry.file_name().to_string_lossy().to_string();
173
174            if entry.file_type()?.is_dir() {
175                // Keep the clang runtime directory
176                if name == "clang" {
177                    copy_dir_recursive(&entry.path(), &dst_lib.join(&name))?;
178                    kept += 1;
179                }
180                // Skip other directories
181            } else if entry.file_type()?.is_file() {
182                let is_dynamic = name.ends_with(".so")
183                    || name.contains(".so.")
184                    || name.ends_with(".dll")
185                    || name.ends_with(".dylib");
186                let is_removable = REMOVE_LIB_EXTENSIONS.iter().any(|ext| name.ends_with(ext))
187                    || name == "CMakeLists.txt";
188
189                if is_dynamic {
190                    fs::copy(entry.path(), dst_lib.join(&name))?;
191                    kept += 1;
192                } else if is_removable {
193                    removed += 1;
194                } else {
195                    // Keep other files
196                    fs::copy(entry.path(), dst_lib.join(&name))?;
197                    kept += 1;
198                }
199            }
200        }
201    }
202
203    // 3. Copy include/ only if keep_headers is true
204    if config.keep_headers {
205        let src_include = llvm_root.join("include");
206        if src_include.exists() {
207            copy_dir_recursive(&src_include, &output_dir.join("include"))?;
208        }
209    }
210
211    // 4. Copy LICENSE/README/NOTICE files
212    for entry in fs::read_dir(llvm_root)? {
213        let entry = entry?;
214        let name = entry.file_name().to_string_lossy().to_string();
215        if entry.file_type()?.is_file()
216            && (name.starts_with("LICENSE")
217                || name.starts_with("README")
218                || name.starts_with("NOTICE"))
219        {
220            fs::copy(entry.path(), output_dir.join(&name))?;
221        }
222    }
223
224    Ok((kept, removed))
225}
226
227/// Recursively copy a directory.
228fn copy_dir_recursive(src: &Path, dst: &Path) -> Result<()> {
229    fs::create_dir_all(dst)?;
230    for entry in WalkDir::new(src) {
231        let entry = entry?;
232        let relative = entry
233            .path()
234            .strip_prefix(src)
235            .context("failed to strip prefix during recursive copy")?;
236        let target = dst.join(relative);
237        if entry.file_type().is_dir() {
238            fs::create_dir_all(&target)?;
239        } else {
240            if let Some(parent) = target.parent() {
241                fs::create_dir_all(parent)?;
242            }
243            fs::copy(entry.path(), &target)?;
244        }
245    }
246    Ok(())
247}
248
249/// Strip debug symbols from binaries in the output directory.
250///
251/// On Linux: uses `strip --strip-all` or `llvm-strip --strip-all`
252/// On Windows: uses `llvm-strip.exe --strip-all`
253/// On macOS: uses `strip -x` or `llvm-strip --strip-all`
254fn strip_debug_symbols(output_dir: &Path, config: &StripConfig) -> Result<u64> {
255    if !config.strip_debug {
256        return Ok(0);
257    }
258
259    let bin_dir = output_dir.join("bin");
260    if !bin_dir.exists() {
261        return Ok(0);
262    }
263
264    let mut stripped = 0u64;
265
266    // Try to find llvm-strip in the output
267    let llvm_strip = if config.target.platform == Platform::Win {
268        bin_dir.join("llvm-strip.exe")
269    } else {
270        bin_dir.join("llvm-strip")
271    };
272
273    for entry in fs::read_dir(&bin_dir)? {
274        let entry = entry?;
275        if !entry.file_type()?.is_file() {
276            continue;
277        }
278        let path = entry.path();
279        let name = entry.file_name().to_string_lossy().to_string();
280
281        // Determine if this file should be stripped
282        let should_strip = match config.target.platform {
283            Platform::Win => name.ends_with(".exe") || name.ends_with(".dll"),
284            _ => {
285                // On Unix: known extensions or extensionless files (executables)
286                name.ends_with(".so") || name.ends_with(".dylib") || !name.contains('.')
287            }
288        };
289
290        if !should_strip {
291            continue;
292        }
293
294        // Don't strip llvm-strip itself while using it
295        if path == llvm_strip {
296            continue;
297        }
298
299        let cmd_result = if llvm_strip.exists() {
300            Command::new(&llvm_strip)
301                .args(["--strip-all", &path.to_string_lossy()])
302                .output()
303        } else if config.target.platform != Platform::Win {
304            Command::new("strip")
305                .args(["--strip-all", &path.to_string_lossy()])
306                .output()
307        } else {
308            continue; // No strip tool available on Windows without llvm-strip
309        };
310
311        match cmd_result {
312            Ok(output) if output.status.success() => {
313                stripped += 1;
314                if config.verbose {
315                    println!("  Stripped: {name}");
316                }
317            }
318            Ok(output) => {
319                if config.verbose {
320                    println!(
321                        "  Failed to strip {name}: {}",
322                        String::from_utf8_lossy(&output.stderr)
323                    );
324                }
325            }
326            Err(e) => {
327                if config.verbose {
328                    println!("  Error stripping {name}: {e}");
329                }
330            }
331        }
332    }
333
334    Ok(stripped)
335}
336
337/// Main entry point: strip an LLVM distribution.
338///
339/// Finds the LLVM root in `source_dir`, copies only essential files to
340/// `output_dir`, and optionally strips debug symbols from binaries.
341pub fn strip_llvm(
342    source_dir: &Path,
343    output_dir: &Path,
344    config: &StripConfig,
345) -> Result<StripStats> {
346    let llvm_root = find_llvm_root(source_dir).ok_or_else(|| {
347        anyhow::anyhow!(
348            "Could not find LLVM root (directory with bin/) in {}",
349            source_dir.display()
350        )
351    })?;
352
353    println!("Found LLVM root: {}", llvm_root.display());
354
355    let original_size = dir_size(&llvm_root);
356    println!("Original size: {}", ctcb_core::format_size(original_size));
357
358    println!("Copying essential files...");
359    let (kept, removed) = copy_essential_files(&llvm_root, output_dir, config)?;
360
361    if config.strip_debug {
362        println!("Stripping debug symbols...");
363        let stripped_count = strip_debug_symbols(output_dir, config)?;
364        println!("Stripped {stripped_count} binaries");
365    }
366
367    let final_size = dir_size(output_dir);
368    let savings = original_size.saturating_sub(final_size);
369    let pct = if original_size > 0 {
370        savings as f64 / original_size as f64 * 100.0
371    } else {
372        0.0
373    };
374
375    ctcb_core::print_section("Statistics");
376    println!(
377        "Original size:  {:>10}",
378        ctcb_core::format_size(original_size)
379    );
380    println!("Final size:     {:>10}", ctcb_core::format_size(final_size));
381    println!(
382        "Saved:          {:>10} ({:.1}%)",
383        ctcb_core::format_size(savings),
384        pct
385    );
386    println!("Files kept:     {:>10}", kept);
387    println!("Files removed:  {:>10}", removed);
388
389    Ok(StripStats {
390        original_size,
391        final_size,
392        files_kept: kept,
393        files_removed: removed,
394    })
395}
396
397// ===========================================================================
398// Tests
399// ===========================================================================
400
401#[cfg(test)]
402mod tests {
403    use super::*;
404    use ctcb_core::Arch;
405    use tempfile::TempDir;
406
407    fn create_fake_llvm(dir: &Path) {
408        let bin = dir.join("bin");
409        fs::create_dir_all(&bin).unwrap();
410        // Essential
411        fs::write(bin.join("clang.exe"), b"fake clang").unwrap();
412        fs::write(bin.join("lld.exe"), b"fake lld").unwrap();
413        fs::write(bin.join("llvm-ar.exe"), b"fake ar").unwrap();
414        // Non-essential
415        fs::write(bin.join("bugpoint.exe"), b"fake bugpoint").unwrap();
416        fs::write(bin.join("llvm-reduce.exe"), b"fake reduce").unwrap();
417
418        let lib = dir.join("lib");
419        fs::create_dir_all(&lib).unwrap();
420        fs::write(lib.join("libclang.a"), b"static lib").unwrap();
421        fs::write(lib.join("libclang.dll"), b"dynamic lib").unwrap();
422
423        fs::write(dir.join("LICENSE.TXT"), b"license").unwrap();
424    }
425
426    #[test]
427    fn test_is_essential_binary() {
428        assert!(is_essential_binary("clang.exe"));
429        assert!(is_essential_binary("clang"));
430        assert!(is_essential_binary("lld"));
431        assert!(is_essential_binary("llvm-ar.exe"));
432        assert!(!is_essential_binary("bugpoint.exe"));
433        assert!(!is_essential_binary("llvm-reduce"));
434    }
435
436    #[test]
437    fn test_is_essential_versioned_clang() {
438        assert!(is_essential_binary("clang-19"));
439        assert!(is_essential_binary("clang-19.1"));
440    }
441
442    #[test]
443    fn test_find_llvm_root_direct() {
444        let tmp = TempDir::new().unwrap();
445        fs::create_dir_all(tmp.path().join("bin")).unwrap();
446        assert_eq!(find_llvm_root(tmp.path()), Some(tmp.path().to_path_buf()));
447    }
448
449    #[test]
450    fn test_find_llvm_root_nested() {
451        let tmp = TempDir::new().unwrap();
452        fs::create_dir_all(tmp.path().join("llvm-19/bin")).unwrap();
453        assert!(find_llvm_root(tmp.path()).is_some());
454    }
455
456    #[test]
457    fn test_strip_llvm_keeps_essentials() {
458        let tmp = TempDir::new().unwrap();
459        let source = tmp.path().join("source");
460        let output = tmp.path().join("output");
461        fs::create_dir_all(&source).unwrap();
462        create_fake_llvm(&source);
463
464        let config = StripConfig {
465            target: Target::new(Platform::Win, Arch::X86_64),
466            keep_headers: false,
467            strip_debug: false, // Don't try to run strip in tests
468            verbose: false,
469        };
470
471        let stats = strip_llvm(&source, &output, &config).unwrap();
472
473        // Essential binaries should exist
474        assert!(output.join("bin/clang.exe").exists());
475        assert!(output.join("bin/lld.exe").exists());
476        assert!(output.join("bin/llvm-ar.exe").exists());
477
478        // Non-essential should NOT exist
479        assert!(!output.join("bin/bugpoint.exe").exists());
480        assert!(!output.join("bin/llvm-reduce.exe").exists());
481
482        // Static libs should be removed, dynamic kept
483        assert!(!output.join("lib/libclang.a").exists());
484        assert!(output.join("lib/libclang.dll").exists());
485
486        // License should be kept
487        assert!(output.join("LICENSE.TXT").exists());
488
489        assert!(stats.files_kept > 0);
490        assert!(stats.files_removed > 0);
491    }
492}