substance/
lib.rs

1use owo_colors::OwoColorize;
2pub use types::*;
3
4use camino::{Utf8Path, Utf8PathBuf};
5use ignore::WalkBuilder;
6
7use std::collections::HashMap;
8use std::process::{Command, Stdio};
9use std::time::{Duration, Instant};
10
11use binfarce::ar;
12use log::{debug, error, info, trace, warn};
13
14use crate::cargo::{CargoMessage, TimingInfo};
15use crate::crate_name::StdHandling;
16use crate::env::{collect_rlib_paths, stdlibs_dir};
17use crate::errors::SubstanceError;
18use crate::llvm_ir::analyze_llvm_ir_from_target_dir;
19use crate::object::{collect_deps_symbols, collect_self_data};
20
21pub mod cargo;
22pub mod crate_name;
23pub mod env;
24pub mod errors;
25pub mod formatting;
26pub mod llvm_ir;
27pub mod object;
28pub mod reporting;
29pub mod types;
30
31pub struct BuildRunner {
32    manifest_path: Utf8PathBuf,
33    target_dir: Utf8PathBuf,
34
35    /// Store the TempDir to ensure the temporary directory lives as long as the BuildRunner.
36    _temp_dir: Option<tempfile::TempDir>,
37
38    /// Flags like `--bin blah`, or `--example bleh` etc.
39    additional_args: Vec<String>,
40}
41
42// Result of a build run with all parsed data
43pub struct BuildResult {
44    pub context: BuildContext,
45    pub timing_data: Vec<TimingInfo>,
46}
47
48// Analysis comparison types
49#[derive(Debug, Clone)]
50pub struct AnalysisComparison {
51    pub file_size_diff: FileSizeDiff,
52    pub symbol_changes: Vec<SymbolChange>,
53    pub crate_changes: Vec<CrateChange>,
54}
55
56#[derive(Debug, Clone)]
57pub struct FileSizeDiff {
58    pub file_size_before: ByteSize,
59    pub file_size_after: ByteSize,
60    pub text_size_before: ByteSize,
61    pub text_size_after: ByteSize,
62}
63
64#[derive(Debug, Clone)]
65pub struct SymbolChange {
66    pub name: String,
67    pub demangled: String,
68    pub size_before: Option<u64>,
69    pub size_after: Option<u64>,
70}
71
72#[derive(Debug, Clone)]
73pub struct CrateChange {
74    pub name: String,
75    pub size_before: Option<u64>,
76    pub size_after: Option<u64>,
77}
78
79impl BuildRunner {
80    /// Create a new BuildRunner instance.
81    pub fn for_manifest(manifest_path: impl Into<Utf8PathBuf>) -> Self {
82        use std::env;
83
84        // Check if SUBSTANCE_TMP_DIR is set
85        if let Ok(dir) = env::var("SUBSTANCE_TMP_DIR") {
86            let manifest_path: Utf8PathBuf = manifest_path.into();
87
88            // Mix in the hash of the manifest_path to the target_dir for uniqueness
89            use std::collections::hash_map::DefaultHasher;
90            use std::hash::{Hash, Hasher};
91
92            let mut hasher = DefaultHasher::new();
93            manifest_path.hash(&mut hasher);
94            let hash_val = hasher.finish();
95
96            let base_target_dir = Utf8PathBuf::from(dir);
97            let target_dir = base_target_dir.join(format!("{hash_val:016x}"));
98
99            info!(
100                "Using SUBSTANCE_TMP_DIR as target directory: {target_dir} (mixed with manifest hash)"
101            );
102            Self {
103                manifest_path,
104                target_dir,
105                _temp_dir: None,
106                additional_args: Vec::new(),
107            }
108        } else {
109            // Generate a temporary directory for the target directory.
110            let tmp_dir = tempfile::Builder::new()
111                .prefix("substance-build-tmp")
112                .tempdir()
113                .expect("Failed to create temporary build directory");
114            let target_dir = Utf8PathBuf::from_path_buf(tmp_dir.path().to_path_buf())
115                .expect("Temporary target_dir is not valid UTF-8");
116
117            // Store the TempDir so it is kept alive as long as BuildRunner lives.
118            Self {
119                manifest_path: manifest_path.into(),
120                target_dir,
121                _temp_dir: Some(tmp_dir),
122                additional_args: Vec::new(),
123            }
124        }
125    }
126
127    /// Add an additional argument to the cargo build command.
128    pub fn arg<T: Into<String>>(mut self, arg: T) -> Self {
129        self.additional_args.push(arg.into());
130        self
131    }
132
133    pub fn run(&self) -> Result<BuildContext, SubstanceError> {
134        // Ensure manifest exists
135        if !self.manifest_path.exists() {
136            error!("Manifest file not found: {:?}", self.manifest_path);
137            return Err(SubstanceError::OpenFailed(self.manifest_path.clone()));
138        }
139
140        info!("Building project from manifest: {:?}", self.manifest_path);
141        info!("Target directory: {:?}", self.target_dir);
142
143        let mut cmd = self.build_command();
144
145        // Execute the build and forward stdout/stderr to the parent's stdout/stderr as it happens,
146        // using two threads, but only collect JSON lines from stdout.
147
148        let before_build = Instant::now();
149
150        let mut cmd = cmd
151            .stdout(Stdio::piped())
152            .stderr(Stdio::piped())
153            .spawn()
154            .map_err(|e| {
155                error!("Failed to execute cargo: {e}");
156                SubstanceError::CargoError(format!("Failed to execute cargo: {e}"))
157            })?;
158
159        use std::io::{BufRead, BufReader};
160        use std::thread;
161
162        let stdout = cmd.stdout.take().expect("Failed to take stdout");
163        let stderr = cmd.stderr.take().expect("Failed to take stderr");
164
165        struct StdoutResult {
166            artifacts: Vec<Artifact>,
167            timing_infos: Vec<TimingInfo>,
168        }
169
170        // Thread for stdout: collect JSON lines, print them, and return Vec<CargoMessage>.
171        let stdout_handle = thread::spawn(move || {
172            // Parse cargo messages to extract artifacts
173            let mut artifacts = Vec::new();
174            let mut timing_infos = Vec::new();
175
176            let reader = BufReader::new(stdout);
177
178            for line_result in reader.lines() {
179                let Ok(line) = line_result else { continue };
180                let msg = match CargoMessage::parse(&line) {
181                    Ok(msg) => msg,
182                    Err(err) => {
183                        eprintln!("Failed to parse cargo message: {err}.\nLine: {line}");
184                        continue;
185                    }
186                };
187                let Some(msg) = msg else {
188                    eprintln!("Received cargo JSON message: {line}");
189                    continue;
190                };
191
192                match msg {
193                    CargoMessage::TimingInfo(timing_info) => {
194                        timing_infos.push(timing_info);
195                    }
196                    CargoMessage::CompilerArtifact(artifact) => {
197                        let kind = {
198                            // Try to guess artifact kind from its file extension (best effort).
199                            let path = &artifact
200                                .filenames
201                                .first()
202                                .expect("No filename in CompilerArtifact");
203                            if let Some(ext) = path.extension() {
204                                match ext {
205                                    "rlib" | "lib" => ArtifactKind::Library,
206                                    "dylib" | "so" | "dll" => ArtifactKind::DynLib,
207                                    "exe" | "bin" => ArtifactKind::Binary,
208                                    _ => ArtifactKind::Binary, // fallback
209                                }
210                            } else {
211                                ArtifactKind::Binary
212                            }
213                        };
214
215                        for filename in &artifact.filenames {
216                            let artifact_struct = Artifact {
217                                kind,
218                                name: artifact.crate_name.clone(),
219                                path: filename.clone(),
220                            };
221                            trace!(
222                                "Found artifact: {:?} - {} at {}",
223                                artifact_struct.kind,
224                                artifact_struct.name,
225                                filename
226                            );
227                            artifacts.push(artifact_struct);
228                        }
229                    }
230                }
231            }
232
233            StdoutResult {
234                artifacts,
235                timing_infos,
236            }
237        });
238
239        // Thread for stderr: print to parent's stderr, but DO NOT collect lines.
240        let stderr_handle = thread::spawn(move || {
241            let reader = BufReader::new(stderr);
242            for line in reader.lines().map_while(Result::ok) {
243                eprintln!("{line}");
244            }
245        });
246
247        // Wait for the command to finish
248        let status = cmd.wait().map_err(|e| {
249            error!("Failed to wait for cargo: {e}");
250            SubstanceError::CargoError(format!("Failed to wait for cargo: {e}"))
251        })?;
252
253        let wall_duration = before_build.elapsed();
254
255        // Wait for both threads to finish reading
256        let stdout_result = stdout_handle.join().unwrap();
257        let _ = stderr_handle.join();
258
259        if !status.success() {
260            error!("Cargo build failed with status: {status:?}");
261            // Stderr was already streamed, so we don't print it again here
262            return Err(SubstanceError::CargoBuildFailed);
263        }
264
265        info!("Cargo build completed successfully");
266
267        // Collect rlib paths from artifacts
268        let mut rlib_paths: Vec<(CrateName, Utf8PathBuf)> = Vec::new();
269        let mut dep_crates = Vec::new();
270        for artifact in &stdout_result.artifacts {
271            dep_crates.push(artifact.name.clone());
272
273            if matches!(artifact.kind, ArtifactKind::Library) {
274                rlib_paths.push((artifact.name.clone(), artifact.path.clone()));
275            }
276        }
277
278        dep_crates.dedup();
279        dep_crates.sort();
280
281        // Get std crates - always collect them since we can't tell if build-std was used from JSON
282        let target_dylib_path = stdlibs_dir()?;
283        let std_paths = collect_rlib_paths(&target_dylib_path);
284
285        let mut std_crates: Vec<CrateName> = std_paths.iter().map(|v| v.0.clone()).collect();
286        rlib_paths.extend_from_slice(&std_paths);
287        std_crates.sort();
288
289        // Remove std crates that were explicitly added as dependencies.
290        for c in &dep_crates {
291            if let Some(idx) = std_crates.iter().position(|v| v == c) {
292                std_crates.remove(idx);
293            }
294        }
295
296        // Build symbol mapping
297        info!("Building dependency symbol mapping...");
298        let deps_symbols = collect_deps_symbols(rlib_paths)?;
299        debug!("Collected symbols for {} dependencies.", deps_symbols.len());
300
301        // Find the binary artifact first, filtering out build scripts
302        info!("Locating binary artifact for analysis (excluding build-script-build)...");
303        let binary_artifact = stdout_result
304            .artifacts
305            .into_iter()
306            .find(|a| {
307                matches!(a.kind, ArtifactKind::Binary) && a.name.as_str() != "build-script-build"
308            })
309            .ok_or(SubstanceError::CargoError(
310                "No binary artifact found (all were build-script-build or missing).".to_string(),
311            ))?;
312        info!(
313            "Binary artifact found: {} (path: {})",
314            binary_artifact.name, binary_artifact.path
315        );
316
317        // Get file size of the binary
318        let file_metadata = std::fs::metadata(&binary_artifact.path)
319            .map_err(|_| SubstanceError::OpenFailed(binary_artifact.path.clone()))?;
320        let file_size = ByteSize::new(file_metadata.len());
321        info!("Binary file size: {} bytes", file_size.value().yellow());
322
323        info!(
324            "Collecting self data (.text section) from binary artifact: {}",
325            binary_artifact.path.blue()
326        );
327        let raw_data = collect_self_data(&binary_artifact.path, ".text")?;
328        let text_size = ByteSize::new(raw_data.text_size);
329        debug!(
330            "Collected self data for binary artifact (.text section size: {} bytes).",
331            text_size.value().green()
332        );
333
334        let mut context = BuildContext {
335            std_crates,
336            dep_crates,
337            deps_symbols,
338            wall_duration,
339            file_size,
340            text_size,
341            crates: Default::default(),
342        };
343
344        // Analyze LLVM IR (if any) for this crate from the target dir
345        info!(
346            "Analyzing LLVM IR files (if present) in target dir: {}",
347            self.target_dir.blue()
348        );
349        let llvm_functions =
350            analyze_llvm_ir_from_target_dir(&self.target_dir).unwrap_or_else(|err| {
351                warn!(
352                    "Failed to analyze LLVM IR files: {}. Continuing without LLVM IR data.",
353                    err.red()
354                );
355                HashMap::new()
356            });
357
358        info!(
359            "LLVM IR analysis: found {} LLVM functions.",
360            llvm_functions.len().bright_purple()
361        );
362
363        // Compute build times per crate.
364        let mut crate_build_times: HashMap<CrateName, Duration> = HashMap::new();
365        for timing in &stdout_result.timing_infos {
366            let crate_name = timing
367                .target
368                .name
369                .clone()
370                .map(CrateName::from)
371                .unwrap_or_else(|| CrateName::from("unknown"));
372            crate_build_times
373                .entry(crate_name)
374                .or_insert_with(|| Duration::from_secs_f64(timing.duration));
375        }
376
377        // Build crate information from the collected data
378        let mut crates_map: HashMap<CrateName, Crate> = HashMap::new();
379
380        // Process binary symbols and group by crate
381        for symbol in raw_data.symbols {
382            let (crate_name, _exact) =
383                crate_name::from_sym(&context, StdHandling::Merged, &symbol.name);
384            let demangled_symbol = DemangledSymbol::from(symbol.name.complete);
385            let symbol_obj = Symbol {
386                name: demangled_symbol.clone(),
387                size: ByteSize::new(symbol.size),
388            };
389
390            crates_map
391                .entry(crate_name)
392                .or_insert_with(|| Crate {
393                    name: CrateName::from(""),
394                    symbols: HashMap::new(),
395                    llvm_functions: HashMap::new(),
396                    timing_info: None,
397                })
398                .symbols
399                .insert(demangled_symbol, symbol_obj);
400        }
401
402        // Process LLVM functions and group by crate
403        for (llvm_fn_name, llvm_fn) in llvm_functions {
404            // Extract crate name from the function path using robust logic
405            let crate_name = {
406                let crate_string = crate_name::extract_crate_from_function(&llvm_fn_name);
407                if crate_string == "unknown" {
408                    // Fallback to binary artifact name as main crate
409                    binary_artifact.name.clone()
410                } else {
411                    CrateName::from(crate_string)
412                }
413            };
414
415            // Update the LlvmFunction with its proper name
416            let mut llvm_fn_with_name = llvm_fn;
417            llvm_fn_with_name.name = llvm_fn_name.clone();
418
419            crates_map
420                .entry(crate_name)
421                .or_insert_with(|| Crate {
422                    name: CrateName::from(""),
423                    symbols: HashMap::new(),
424                    llvm_functions: HashMap::new(),
425                    timing_info: None,
426                })
427                .llvm_functions
428                .insert(llvm_fn_name, llvm_fn_with_name);
429        }
430        // Set the proper crate names, populate timing information, and collect into a Vec
431        let mut crates: Vec<Crate> = crates_map
432            .into_iter()
433            .map(|(name, mut crate_obj)| {
434                // Assign the crate name
435                crate_obj.name = name.clone();
436
437                // If we have recorded build timing for this crate, attach it
438                if let Some(dur) = crate_build_times.get(&name) {
439                    crate_obj.timing_info = Some(TimingInfo {
440                        target: crate::cargo::CargoTarget {
441                            name: Some(name.as_str().to_string()),
442                            kind: None,
443                            crate_types: None,
444                        },
445                        duration: dur.as_secs_f64(),
446                        rmeta_time: None,
447                    });
448                }
449
450                crate_obj
451            })
452            .collect();
453
454        // Sort crates by name for consistent output
455        crates.sort_by(|a, b| a.name.cmp(&b.name));
456
457        context.crates = crates;
458
459        Ok(context)
460    }
461
462    fn build_command(&self) -> Command {
463        let mut cmd = Command::new("cargo");
464        cmd.arg("build");
465
466        // Just pass additional args
467        cmd.args(&self.additional_args);
468
469        // Add required flags for analysis
470        cmd.args([
471            "--message-format=json",
472            "-Z",
473            "unstable-options",
474            "-Z",
475            "binary-dep-depinfo",
476            "-Z",
477            "checksum-freshness",
478            "--timings=json",
479            "--manifest-path",
480        ]);
481        cmd.arg(&self.manifest_path);
482        cmd.arg("--target-dir");
483        cmd.arg(&self.target_dir);
484        let rustflags = "--emit=llvm-ir -Cdebuginfo=line-tables-only -Cstrip=none";
485
486        // Set environment variables for LLVM IR, timing, and Cstrip
487        cmd.env("RUSTFLAGS", rustflags);
488        cmd.env("RUSTC_BOOTSTRAP", "1");
489        // Force colored output in cargo/rustc even if not a tty
490        cmd.env("CLICOLOR_FORCE", "1");
491
492        cmd
493    }
494}
495
496/// Finds all `.ll` files within a given directory, ignoring `build` directories.
497pub fn find_llvm_ir_files(root_dir: &Utf8Path) -> Result<Vec<Utf8PathBuf>, SubstanceError> {
498    let mut ll_files = Vec::new();
499
500    let walker = WalkBuilder::new(root_dir).build();
501
502    for entry in walker {
503        let entry = entry.map_err(|e| {
504            SubstanceError::CargoError(format!(
505                "Error iterating directory during search for .ll files: {e}"
506            ))
507        })?;
508        let path = entry.path();
509        let path = match Utf8Path::from_path(path) {
510            Some(path) => path,
511            None => {
512                eprintln!("Failed to convert path to Utf8Path: non-UTF8 path encountered");
513                continue;
514            }
515        };
516
517        // Check if the path is a file and ends with .ll
518        if path.is_file() && path.extension() == Some("ll") {
519            ll_files.push(path.to_path_buf());
520        }
521    }
522
523    Ok(ll_files)
524}