trust_rustc/
lib.rs

1//! Shared lowering/cache/mirror logic used by the `trust-rustc`
2//! (`RUSTC_WRAPPER`) and `trust-rustdoc` (`RUSTDOC`) shims.
3//!
4//! Both wrappers do the same job: given a rustc/rustdoc invocation, find
5//! the input `.rs` file, and — if it's strict-marked — lower the whole
6//! source tree into a temp directory keyed by an FNV-1a content hash, then
7//! rewrite the input path so the underlying tool sees plain Rust.
8//!
9//! The functions here are the parts that don't depend on whether we're
10//! about to exec `rustc` or `rustdoc`. The two `main.rs` files differ only
11//! in how they parse out the tool path / input arg.
12
13use anyhow::{bail, Context, Result};
14use std::env;
15use std::fs;
16use std::path::{Path, PathBuf};
17
18/// Version string mixed into the cache key. Bumps automatically with the
19/// package version; bump the package whenever lowering output changes in
20/// a way that would invalidate previously-cached files.
21const LOWERING_VERSION: &str = env!("CARGO_PKG_VERSION");
22
23/// Fingerprint of the running wrapper binary (length ⊕ mtime), mixed into
24/// the cache key (RT-86). The package version alone is constant across a
25/// whole dev cycle, so a rebuilt wrapper with changed lowering code would
26/// happily reuse lowered output produced by the previous build — which is
27/// exactly the kind of stale-cache haunting that makes verification results
28/// flip between runs. A new binary now always means a fresh cache namespace.
29fn wrapper_fingerprint() -> u64 {
30    use std::sync::OnceLock;
31    static FP: OnceLock<u64> = OnceLock::new();
32    *FP.get_or_init(|| {
33        let Ok(exe) = env::current_exe() else {
34            return 0;
35        };
36        let Ok(meta) = fs::metadata(&exe) else {
37            return 0;
38        };
39        let mtime = meta
40            .modified()
41            .ok()
42            .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
43            .map(|d| d.as_nanos() as u64)
44            .unwrap_or(0);
45        meta.len() ^ mtime
46    })
47}
48
49/// FNV-1a 64-bit hash of the lowering-version string, the wrapper binary's
50/// fingerprint, and the source bytes. Fast, no deps, deterministic per
51/// wrapper build.
52pub fn source_cache_key(source: &str) -> u64 {
53    const FNV_OFFSET: u64 = 0xcbf29ce484222325;
54    const FNV_PRIME: u64 = 0x100000001b3;
55    let mut hash = FNV_OFFSET;
56    for byte in LOWERING_VERSION
57        .bytes()
58        .chain(wrapper_fingerprint().to_le_bytes())
59        .chain(source.bytes())
60    {
61        hash ^= byte as u64;
62        hash = hash.wrapping_mul(FNV_PRIME);
63    }
64    hash
65}
66
67/// Result of preparing a strict-source invocation: the path to the lowered
68/// crate-root file, and a `--remap-path-prefix=<cache>=<orig>` flag the
69/// caller should append to the tool args so diagnostics still point at the
70/// user's source.
71pub struct Prepared {
72    pub lowered_root: PathBuf,
73    pub remap_flag: String,
74}
75
76/// Walk `src_dir` recursively, parsing every `.rs` file with `syn` and
77/// collecting `(fn_name, [param_names...])` for every module-level `fn`
78/// definition (free fns, `pub` or otherwise; module-nested `fn`s
79/// included). Used by [`prepare_strict_input`] to build a crate-wide
80/// callee registry so cross-file named-arg call sites resolve (RT-40).
81///
82/// **What's covered:** plain free fns at module level, including inside
83/// `mod foo { ... }` blocks within a single file. **What's not:** trait
84/// methods, `impl` methods, and fns inside file-mod descendants that
85/// `syn::parse_file` can't reach (those will still be picked up when the
86/// file itself is parsed, because the recursive walk visits every `.rs`).
87///
88/// **Ambiguity policy:** if two files declare a fn with the same name
89/// but different param lists, the name is dropped from the index — same
90/// behaviour as the in-file collector. Dropping is safer than guessing
91/// which signature the caller meant.
92///
93/// Parse errors and unreadable files are silently skipped — the wrapper
94/// stays best-effort.  The downstream lowering pass will surface real
95/// errors on the file that actually has the syntax problem.
96pub fn collect_crate_callees(src_dir: &Path) -> Vec<(String, Vec<String>)> {
97    use std::collections::{HashMap, HashSet};
98    let mut sigs: HashMap<String, Vec<String>> = HashMap::new();
99    let mut ambiguous: HashSet<String> = HashSet::new();
100    let mut visited: HashSet<PathBuf> = HashSet::new();
101    collect_crate_callees_recursive(src_dir, &mut sigs, &mut ambiguous, &mut visited);
102    let mut out: Vec<(String, Vec<String>)> = sigs.into_iter().collect();
103    out.sort_by(|a, b| a.0.cmp(&b.0));
104    out
105}
106
107fn collect_crate_callees_recursive(
108    dir: &Path,
109    sigs: &mut std::collections::HashMap<String, Vec<String>>,
110    ambiguous: &mut std::collections::HashSet<String>,
111    visited: &mut std::collections::HashSet<PathBuf>,
112) {
113    if !dir.is_dir() {
114        return;
115    }
116    let Ok(read) = fs::read_dir(dir) else {
117        return;
118    };
119    for entry in read.flatten() {
120        let path = entry.path();
121        let canonical = path.canonicalize().unwrap_or_else(|_| path.clone());
122        if !visited.insert(canonical) {
123            continue;
124        }
125        if path.is_dir() {
126            collect_crate_callees_recursive(&path, sigs, ambiguous, visited);
127        } else if path.extension().and_then(|e| e.to_str()) == Some("rs") {
128            if let Ok(source) = fs::read_to_string(&path) {
129                // Pre-lower the source so syn can parse it. The crate-wide
130                // index is derived from the *lowered* signatures — but
131                // since fn signatures don't use named-arg call syntax,
132                // syn::parse_file on the raw source usually works. Try
133                // raw first; on failure, fall through to lowered.
134                let file = syn::parse_file(&source).ok().or_else(|| {
135                    trust_lower::lower(&source)
136                        .ok()
137                        .and_then(|lo| syn::parse_file(&lo.source).ok())
138                });
139                if let Some(file) = file {
140                    walk_items_for_sigs(&file.items, sigs, ambiguous);
141                }
142            }
143        }
144    }
145}
146
147fn walk_items_for_sigs(
148    items: &[syn::Item],
149    sigs: &mut std::collections::HashMap<String, Vec<String>>,
150    ambiguous: &mut std::collections::HashSet<String>,
151) {
152    for item in items {
153        match item {
154            syn::Item::Fn(f) => record_fn_sig(&f.sig, sigs, ambiguous),
155            syn::Item::Mod(m) => {
156                if let Some((_, inner)) = &m.content {
157                    walk_items_for_sigs(inner, sigs, ambiguous);
158                }
159            }
160            _ => {}
161        }
162    }
163}
164
165fn record_fn_sig(
166    sig: &syn::Signature,
167    sigs: &mut std::collections::HashMap<String, Vec<String>>,
168    ambiguous: &mut std::collections::HashSet<String>,
169) {
170    let name = sig.ident.to_string();
171    if ambiguous.contains(&name) {
172        return;
173    }
174    let mut params: Vec<String> = Vec::new();
175    for input in &sig.inputs {
176        match input {
177            syn::FnArg::Receiver(_) => {} // skip self
178            syn::FnArg::Typed(pat_type) => match &*pat_type.pat {
179                syn::Pat::Ident(pi) => params.push(pi.ident.to_string()),
180                _ => {
181                    // Non-ident pattern (destructure) — can't bind by name.
182                    sigs.remove(&name);
183                    ambiguous.insert(name);
184                    return;
185                }
186            },
187        }
188    }
189    match sigs.get(&name) {
190        Some(existing) if existing != &params => {
191            sigs.remove(&name);
192            ambiguous.insert(name);
193        }
194        Some(_) => {}
195        None => {
196            sigs.insert(name, params);
197        }
198    }
199}
200
201/// Find the input `.rs` file argument in a rustc/rustdoc arg list.
202///
203/// Cargo passes exactly one `.rs` crate-root per invocation. Flag args
204/// start with `-`, and a bare `-` means "read from stdin" (skip).
205pub fn find_input_rs(args: &[String]) -> Option<usize> {
206    args.iter().enumerate().find_map(|(i, a)| {
207        if a == "-" {
208            return None;
209        }
210        if a.ends_with(".rs") && !a.starts_with('-') {
211            Some(i)
212        } else {
213            None
214        }
215    })
216}
217
218/// Whether the crate currently being compiled was opted into strict mode at
219/// the *project* level (`[package.metadata.trust] strict = true`), rather than
220/// per-file with a `#![strict]` marker.
221///
222/// `cargo-trustc` passes the set of strict package names in
223/// `TRUST_STRICT_PACKAGES` (comma-separated). Cargo sets `CARGO_PKG_NAME` for
224/// every rustc invocation — including dependencies — so gating on membership
225/// scopes forced lowering to exactly the user's own opted-in package(s) and
226/// never touches third-party crates compiled in the same build.
227pub fn crate_is_force_strict() -> bool {
228    force_strict_for(
229        env::var("TRUST_STRICT_PACKAGES").ok().as_deref(),
230        env::var("CARGO_PKG_NAME").ok().as_deref(),
231    )
232}
233
234/// Pure membership check behind [`crate_is_force_strict`]: is `name` listed in
235/// the comma-separated `pkgs` set? An empty/absent name or list is never a
236/// match — this is what keeps dependencies (which carry their own
237/// `CARGO_PKG_NAME`, not in the user's set) out of forced lowering.
238fn force_strict_for(pkgs: Option<&str>, name: Option<&str>) -> bool {
239    let (Some(pkgs), Some(name)) = (pkgs, name) else {
240        return false;
241    };
242    let name = name.trim();
243    !name.is_empty() && pkgs.split(',').any(|p| p.trim() == name)
244}
245
246/// True if a file should be lowered: either it carries an explicit strict
247/// marker, or its crate was opted in at the project level.
248fn should_lower(source: &str) -> bool {
249    trust_lower::is_strict_source(source) || crate_is_force_strict()
250}
251
252/// If `input_path` is strict (per-file marker or project-level opt-in), lower
253/// the whole source tree into the cache and return the new root path +
254/// a `--remap-path-prefix` flag.
255///
256/// Returns `Ok(None)` for non-strict sources — the caller should pass the
257/// original args through to the underlying tool unchanged.
258pub fn prepare_strict_input(input_path: &Path) -> Result<Option<Prepared>> {
259    let source = match fs::read_to_string(input_path) {
260        Ok(s) => s,
261        Err(_) => return Ok(None),
262    };
263
264    if !should_lower(&source) {
265        return Ok(None);
266    }
267    let force_strict = crate_is_force_strict();
268
269    let file_name = input_path
270        .file_name()
271        .context("input path has no file name")?;
272
273    let cache_key = source_cache_key(&source);
274    let cache_root = env::temp_dir().join("trust-cache");
275    let cache_dir = cache_root.join(format!("{cache_key:016x}"));
276    let cached_file = cache_dir.join(file_name);
277
278    // RT-86: the cache directory's EXISTENCE is the validity marker, so it
279    // must appear atomically. A failed mirror used to leave a partial dir
280    // behind, and the old per-file `cached_file.exists()` check then treated
281    // it as complete on the next run — phantom passes/failures that flip
282    // depending on which run came first. Populate a private staging dir and
283    // rename it into place only after every file lowered clean.
284    if !cache_dir.exists() {
285        let staging = cache_root.join(format!(".staging-{cache_key:016x}-{}", std::process::id()));
286        let _ = fs::remove_dir_all(&staging);
287
288        let result = (|| -> Result<()> {
289            let src_dir = input_path
290                .parent()
291                .filter(|p| !p.as_os_str().is_empty())
292                .map(Path::to_path_buf)
293                .unwrap_or_else(|| PathBuf::from("."));
294
295            // RT-40: pre-scan the whole `src/` tree for `fn` definitions so
296            // cross-file named-arg call sites resolve. The wrapper is the
297            // first place that has a crate-wide view; individual `lower()`
298            // calls only see one file at a time.
299            let crate_extras = collect_crate_callees(&src_dir);
300
301            // RT-66: seed the registry with the public-fn signatures of
302            // dependencies, discovered from the `TRUST_SIGNATURE_PATH`
303            // manifests (`trust index <dep> -o …` produces them). This is
304            // what lets R0042 fire — and named args reorder — on a
305            // positional swap into a *third-party* crate. `merge` drops any
306            // name that conflicts between the crate and a dependency, so a
307            // shadowed name degrades to the positional fallback rather than
308            // a wrong reorder.
309            let dep_extras = trust_lower::sig_index::load_from_env();
310            let extras = trust_lower::sig_index::merge(&[crate_extras, dep_extras]);
311
312            let mut visited = std::collections::HashSet::new();
313            mirror_module_tree_with_extras(&src_dir, &staging, &mut visited, &extras)
314                .with_context(|| format!("mirroring src tree from {}", src_dir.display()))?;
315
316            // Defensive: if the src_dir traversal somehow didn't write the
317            // crate root (e.g. empty dir), do it directly.
318            if !staging.join(file_name).exists() {
319                let out =
320                    trust_lower::lower_with_extra_callees_forced(&source, &extras, force_strict)
321                        .with_context(|| format!("lowering {}", input_path.display()))?;
322                emit_diagnostics(&out, &source, input_path)?;
323                fs::create_dir_all(&staging)?;
324                fs::write(staging.join(file_name), &out.source)?;
325            }
326            Ok(())
327        })();
328
329        if let Err(e) = result {
330            let _ = fs::remove_dir_all(&staging);
331            return Err(e);
332        }
333
334        // Atomic publish. If another process won the race, its complete dir
335        // is just as good — discard ours.
336        if fs::rename(&staging, &cache_dir).is_err() {
337            let _ = fs::remove_dir_all(&staging);
338            if !cache_dir.exists() {
339                bail!(
340                    "could not publish lowering cache at {}",
341                    cache_dir.display()
342                );
343            }
344        }
345    }
346
347    let parent = input_path
348        .parent()
349        .filter(|p| !p.as_os_str().is_empty())
350        .map(Path::to_path_buf)
351        .unwrap_or_else(|| PathBuf::from("."));
352
353    Ok(Some(Prepared {
354        lowered_root: cached_file,
355        remap_flag: format!(
356            "--remap-path-prefix={}={}",
357            cache_dir.display(),
358            parent.display()
359        ),
360    }))
361}
362
363fn emit_diagnostics(
364    out: &trust_lower::LowerOutput,
365    original_source: &str,
366    path: &Path,
367) -> Result<()> {
368    emit_diagnostics_to(out, original_source, path, &mut std::io::stderr())
369}
370
371/// True when the caller asked for machine-readable diagnostics (RT-96).
372///
373/// `cargo trustc build --message-format json` sets this env var on the spawned
374/// cargo process; cargo passes its environment through to every rustc/rustdoc
375/// (and therefore wrapper) invocation. Users may also set
376/// `TRUST_MESSAGE_FORMAT=json` directly — same effect, no flag needed.
377fn message_format_is_json() -> bool {
378    env::var("TRUST_MESSAGE_FORMAT").is_ok_and(|v| v == "json")
379}
380
381/// Testable core of [`emit_diagnostics`]: collects the full `trust check`
382/// rule set for one file and writes it to `writer` — as human `[R0001]`
383/// lines by default, or as one `trust_diag::to_json` document (newline
384/// terminated) when `TRUST_MESSAGE_FORMAT=json` (RT-96). Either way, bails
385/// when any diagnostic is an error.
386fn emit_diagnostics_to(
387    out: &trust_lower::LowerOutput,
388    original_source: &str,
389    path: &Path,
390    writer: &mut impl std::io::Write,
391) -> Result<()> {
392    // RT-89: the wrapper enforces the same rule set as `trust check` — the
393    // lowering diagnostics (R0042 et al) collected in `out`, plus the
394    // AST-level strict lints (R0001 unwrap, R0003 as-cast, ...). The AST
395    // comes from the LOWERED source (plain Rust, always parses); the
396    // ORIGINAL source string is what the linter needs for comment-window
397    // rules (R0005/R0006 justifications) — prettyplease strips comments
398    // from the lowered output. Mirrors `run_pipeline` in the trust CLI.
399    let mut diagnostics = out.diagnostics.clone();
400    if out.strict_mode {
401        // lint_source, not source: the allow map comes from the
402        // `#[allow(trust::…)]` attributes, which are stripped from the
403        // rustc-facing `source`.
404        let file: syn::File = syn::parse_str(&out.lint_source)
405            .with_context(|| format!("re-parsing lowered source from {}", path.display()))?;
406        diagnostics.extend(trust_lints::lint_strict(&file, original_source, true).diagnostics);
407    }
408
409    if message_format_is_json() {
410        // RT-96: one JSON document per file, same shape as
411        // `trust check --format json` (spans index the ORIGINAL source).
412        let name = path.display().to_string();
413        let doc = trust_diag::to_json(
414            &diagnostics,
415            trust_diag::NamedSource {
416                name: &name,
417                text: original_source,
418            },
419        );
420        write!(writer, "{doc}")?;
421        if !doc.ends_with('\n') {
422            writeln!(writer)?;
423        }
424    } else {
425        for diag in &diagnostics {
426            writeln!(
427                writer,
428                "[{}] {}: {}",
429                diag.rule,
430                if diag.is_error() { "error" } else { "warning" },
431                diag.message
432            )?;
433        }
434    }
435    if diagnostics.iter().any(|d| d.is_error()) {
436        bail!("trust check failed on {}", path.display());
437    }
438    Ok(())
439}
440
441/// Files reachable only through a `#[cfg(test)] mod x;` declaration (RT-88).
442///
443/// Project-level force-strict must not apply to these: a stock-buildable
444/// library's tests routinely call its own multi-arg fns positionally, and
445/// the R0042 fix — named-arg syntax — is exactly what stock `cargo test`
446/// cannot parse. Skipping cfg(test)-only files lets such crates opt their
447/// *shipping* code into whole-package strict (trust-diag, trust-std) without
448/// rewriting their test suites in a dialect stock rustc rejects. A file that
449/// carries its own `#![strict]` marker is still lowered — explicit wins.
450///
451/// Detection is token-level (Trust syntax doesn't parse with syn): a file
452/// declares `NAME` test-only via `#[cfg(test)] (pub)? mod NAME ;`, mapping
453/// to `NAME.rs` or `NAME/mod.rs` beside it — and test-only-ness is
454/// transitive through plain `mod` declarations inside test-only files.
455pub fn collect_test_only_files(src_dir: &Path) -> std::collections::HashSet<PathBuf> {
456    use std::collections::HashSet;
457    let mut all_files: Vec<PathBuf> = Vec::new();
458    collect_rs_files(src_dir, &mut all_files);
459
460    // (declaring file, declared name, is_cfg_test)
461    let mut decls: Vec<(PathBuf, String, bool)> = Vec::new();
462    for file in &all_files {
463        let Ok(source) = fs::read_to_string(file) else {
464            continue;
465        };
466        let Ok(tokens) = source.parse::<proc_macro2::TokenStream>() else {
467            continue;
468        };
469        for (name, is_test) in file_mod_declarations(&tokens) {
470            decls.push((file.clone(), name, is_test));
471        }
472    }
473
474    let resolve = |declaring: &Path, name: &str| -> Option<PathBuf> {
475        let dir = declaring.parent()?;
476        let flat = dir.join(format!("{name}.rs"));
477        if flat.is_file() {
478            return flat.canonicalize().ok();
479        }
480        let nested = dir.join(name).join("mod.rs");
481        if nested.is_file() {
482            return nested.canonicalize().ok();
483        }
484        None
485    };
486
487    let mut test_only: HashSet<PathBuf> = HashSet::new();
488    // Seed with direct #[cfg(test)] declarations, then close transitively
489    // over plain mod declarations made from already-test-only files.
490    loop {
491        let mut grew = false;
492        for (declaring, name, is_test) in &decls {
493            let from_test_file = declaring
494                .canonicalize()
495                .map(|c| test_only.contains(&c))
496                .unwrap_or(false);
497            if !is_test && !from_test_file {
498                continue;
499            }
500            if let Some(target) = resolve(declaring, name) {
501                grew |= test_only.insert(target);
502            }
503        }
504        if !grew {
505            break;
506        }
507    }
508    test_only
509}
510
511fn collect_rs_files(dir: &Path, out: &mut Vec<PathBuf>) {
512    let Ok(read) = fs::read_dir(dir) else {
513        return;
514    };
515    for entry in read.flatten() {
516        let path = entry.path();
517        if path.is_dir() {
518            collect_rs_files(&path, out);
519        } else if path.extension().and_then(|e| e.to_str()) == Some("rs") {
520            out.push(path);
521        }
522    }
523}
524
525/// Does a `cfg(...)` argument list make the item test-only — i.e. is `test`
526/// present as a POSITIVE predicate? `test` and `all(unix, test)` qualify;
527/// `not(test)` and `all(unix, not(test))` do not (those select NON-test
528/// builds, so exempting them would skip lowering/linting in production
529/// compiles — PR #1 review finding). `not(...)` subtrees are never recursed
530/// into; `any(...)`/`all(...)` are.
531fn cfg_args_positively_test(tokens: &proc_macro2::TokenStream) -> bool {
532    use proc_macro2::{Delimiter, TokenTree};
533    let trees: Vec<TokenTree> = tokens.clone().into_iter().collect();
534    let mut i = 0;
535    while i < trees.len() {
536        match &trees[i] {
537            TokenTree::Ident(id) if *id == "not" => {
538                // Skip the negated group entirely.
539                if matches!(trees.get(i + 1), Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Parenthesis)
540                {
541                    i += 2;
542                    continue;
543                }
544                i += 1;
545            }
546            TokenTree::Ident(id) if *id == "any" || *id == "all" => {
547                if let Some(TokenTree::Group(g)) = trees.get(i + 1) {
548                    if g.delimiter() == Delimiter::Parenthesis
549                        && cfg_args_positively_test(&g.stream())
550                    {
551                        return true;
552                    }
553                    i += 2;
554                    continue;
555                }
556                i += 1;
557            }
558            // Bare `test` predicate — not the LHS of `name = "value"` (the
559            // RHS of those is a Literal, so an Ident named test here is the
560            // predicate form).
561            TokenTree::Ident(id) if *id == "test" => {
562                let followed_by_eq = matches!(
563                    trees.get(i + 1),
564                    Some(TokenTree::Punct(p)) if p.as_char() == '='
565                );
566                if !followed_by_eq {
567                    return true;
568                }
569                i += 1;
570            }
571            _ => i += 1,
572        }
573    }
574    false
575}
576
577/// Top-level `mod NAME ;` declarations in a token stream, with whether the
578/// directly-preceding attribute run contains a positively-`test` cfg.
579fn file_mod_declarations(tokens: &proc_macro2::TokenStream) -> Vec<(String, bool)> {
580    use proc_macro2::{Delimiter, TokenTree};
581    let trees: Vec<TokenTree> = tokens.clone().into_iter().collect();
582    let mut out = Vec::new();
583    let mut i = 0;
584    let mut pending_cfg_test = false;
585    while i < trees.len() {
586        match &trees[i] {
587            // Attribute: `#` `[ ... ]` — note whether it's cfg(test).
588            TokenTree::Punct(p) if p.as_char() == '#' => {
589                if let Some(TokenTree::Group(g)) = trees.get(i + 1) {
590                    if g.delimiter() == Delimiter::Bracket {
591                        let inner: Vec<TokenTree> = g.stream().into_iter().collect();
592                        if let [TokenTree::Ident(name), TokenTree::Group(args)] = inner.as_slice() {
593                            if *name == "cfg" {
594                                pending_cfg_test |= cfg_args_positively_test(&args.stream());
595                            }
596                        }
597                        i += 2;
598                        continue;
599                    }
600                }
601                i += 1;
602            }
603            // `pub` (and `pub(...)`) between attrs and `mod` — skip.
604            TokenTree::Ident(id) if *id == "pub" => {
605                i += 1;
606                if let Some(TokenTree::Group(g)) = trees.get(i) {
607                    if g.delimiter() == Delimiter::Parenthesis {
608                        i += 1;
609                    }
610                }
611            }
612            TokenTree::Ident(id) if *id == "mod" => {
613                if let (Some(TokenTree::Ident(name)), Some(TokenTree::Punct(semi))) =
614                    (trees.get(i + 1), trees.get(i + 2))
615                {
616                    if semi.as_char() == ';' {
617                        out.push((name.to_string(), pending_cfg_test));
618                    }
619                }
620                pending_cfg_test = false;
621                i += 1;
622            }
623            _ => {
624                pending_cfg_test = false;
625                i += 1;
626            }
627        }
628    }
629    out
630}
631
632/// Recursively mirror the source tree rooted at `src_dir` into `dest_dir`,
633/// lowering strict-marked `.rs` files and hard-linking/copying others.
634pub fn mirror_module_tree(
635    src_dir: &Path,
636    dest_dir: &Path,
637    already_done: &mut std::collections::HashSet<PathBuf>,
638) -> Result<()> {
639    mirror_module_tree_with_extras(src_dir, dest_dir, already_done, &[])
640}
641
642/// Variant of [`mirror_module_tree`] that threads a crate-wide list of
643/// `(fn_name, params)` entries into every per-file lowering call. Used by
644/// `prepare_strict_input` to resolve cross-file named-arg call sites
645/// (RT-40).
646pub fn mirror_module_tree_with_extras(
647    src_dir: &Path,
648    dest_dir: &Path,
649    already_done: &mut std::collections::HashSet<PathBuf>,
650    extras: &[(String, Vec<String>)],
651) -> Result<()> {
652    // RT-88: under project-level force-strict, cfg(test)-only files keep
653    // their plain-Rust form (see collect_test_only_files). Computed once per
654    // mirror at the root call.
655    let test_only = if crate_is_force_strict() {
656        collect_test_only_files(src_dir)
657    } else {
658        std::collections::HashSet::new()
659    };
660    mirror_inner(src_dir, dest_dir, already_done, extras, &test_only)
661}
662
663fn mirror_inner(
664    src_dir: &Path,
665    dest_dir: &Path,
666    already_done: &mut std::collections::HashSet<PathBuf>,
667    extras: &[(String, Vec<String>)],
668    test_only: &std::collections::HashSet<PathBuf>,
669) -> Result<()> {
670    if !src_dir.is_dir() {
671        return Ok(());
672    }
673    fs::create_dir_all(dest_dir).with_context(|| format!("creating {}", dest_dir.display()))?;
674
675    for entry in
676        fs::read_dir(src_dir).with_context(|| format!("reading dir {}", src_dir.display()))?
677    {
678        let entry = entry?;
679        let path = entry.path();
680        let dest = dest_dir.join(entry.file_name());
681
682        let canonical = path.canonicalize().unwrap_or_else(|_| path.clone());
683        let is_test_only = test_only.contains(&canonical);
684        if !already_done.insert(canonical) {
685            continue;
686        }
687
688        if path.is_dir() {
689            mirror_inner(&path, &dest, already_done, extras, test_only)?;
690        } else if path.extension().and_then(|e| e.to_str()) == Some("rs") {
691            let source =
692                fs::read_to_string(&path).with_context(|| format!("reading {}", path.display()))?;
693            // Explicit #![strict] always lowers; force-strict lowers
694            // everything except cfg(test)-only files (RT-88).
695            let lower_this = trust_lower::is_strict_source(&source)
696                || (crate_is_force_strict() && !is_test_only);
697            if lower_this {
698                let out = trust_lower::lower_with_extra_callees_forced(
699                    &source,
700                    extras,
701                    crate_is_force_strict(),
702                )
703                .with_context(|| format!("lowering {}", path.display()))?;
704                emit_diagnostics(&out, &source, &path)?;
705                // Also lower any doc-test code blocks embedded in `///` /
706                // `//!` comments. rustdoc extracts these snippets verbatim
707                // and submits them to rustc; if they contain named-arg
708                // syntax they'd fail on stable. Best-effort: leave blocks
709                // we can't parse untouched (e.g. `ignore`/`text` fences,
710                // or partial snippets that don't parse standalone).
711                let rewritten = lower_doctests_in_source(&out.source);
712                let tmp = dest_dir.join(format!(
713                    ".{}.{}.tmp",
714                    entry.file_name().to_string_lossy(),
715                    std::process::id()
716                ));
717                fs::write(&tmp, &rewritten)?;
718                fs::rename(&tmp, &dest)?;
719            } else {
720                // RT-75: COPY, never hard-link. A hard link shares the inode
721                // with the source file, so any later write/truncate of the
722                // cached copy would destroy the user's original `.rs`.
723                fs::copy(&path, &dest).with_context(|| format!("copying {}", path.display()))?;
724            }
725        } else {
726            // RT-75: non-`.rs` sibling files — copy (best-effort), never
727            // hard-link, for the same inode-sharing reason as above.
728            let _ = fs::copy(&path, &dest);
729        }
730    }
731    Ok(())
732}
733
734/// Lower Trust syntax inside doc-test code blocks (`/// ```...```` ` and
735/// `//! ```...```` `) so `rustdoc --test` doesn't choke when rustc compiles
736/// each snippet on stable. Used by the mirror pass after the file itself
737/// has been lowered.
738///
739/// Strategy: walk the source line-by-line, find runs of doc-comment lines
740/// (`///` or `//!`), then within each run locate ```` ``` ```` fences. The
741/// fence info-string is treated as a doc-test if it's empty or starts with
742/// `rust` (mirroring rustdoc's own classification). Non-test fences
743/// (`text`, `ignore`, `compile_fail`, …) are left alone — rustdoc won't
744/// hand them to rustc anyway, and `compile_fail` tests intentionally don't
745/// compile, so re-lowering them could hide the intended failure.
746///
747/// For each test snippet we try two parse strategies:
748///   1. Lower the snippet as-is (it's already a valid Rust file).
749///   2. If that fails, wrap in `fn __doctest() { … }` and lower; on
750///      success, strip the wrapper.
751///
752/// If both fail (snippet doesn't parse standalone — e.g. it's only an
753/// expression, or has hidden `#`-prefixed lines), we leave the block
754/// unchanged. The doc-test will fail at rustc time with a clearer error
755/// than anything we could produce.
756pub fn lower_doctests_in_source(source: &str) -> String {
757    let mut out = String::with_capacity(source.len());
758    let lines: Vec<&str> = source.lines().collect();
759    let mut i = 0;
760    while i < lines.len() {
761        let (Some(prefix), Some(_)) = (doc_prefix(lines[i]), doc_body(lines[i])) else {
762            out.push_str(lines[i]);
763            out.push('\n');
764            i += 1;
765            continue;
766        };
767        // Collect this doc-comment block (consecutive lines with the same prefix).
768        let block_start = i;
769        while i < lines.len() && doc_prefix(lines[i]) == Some(prefix) {
770            i += 1;
771        }
772        let block_end = i;
773        let block = rewrite_doc_block(&lines[block_start..block_end], prefix);
774        out.push_str(&block);
775        // `rewrite_doc_block` always ends with a newline-per-line layout.
776    }
777    out
778}
779
780fn doc_prefix(line: &str) -> Option<&'static str> {
781    let trimmed = line.trim_start();
782    if trimmed.starts_with("///") {
783        Some("///")
784    } else if trimmed.starts_with("//!") {
785        Some("//!")
786    } else {
787        None
788    }
789}
790
791fn doc_body(line: &str) -> Option<&str> {
792    let trimmed = line.trim_start();
793    let body = trimmed
794        .strip_prefix("///")
795        .or_else(|| trimmed.strip_prefix("//!"))?;
796    Some(body.strip_prefix(' ').unwrap_or(body))
797}
798
799/// Rewrite a contiguous doc-comment block, transforming code-fenced
800/// doc-test snippets through `trust_lower::lower`.
801fn rewrite_doc_block(lines: &[&str], prefix: &str) -> String {
802    // Extract the indent of the first line so we can reproduce it.
803    let first = lines[0];
804    let indent_len = first.len() - first.trim_start().len();
805    let indent = &first[..indent_len];
806
807    // Walk lines; when we hit a fence inside a doc-test block, buffer
808    // the code lines, lower the buffer, then splice the lowered text
809    // back as new doc-comment lines.
810    let mut out = String::new();
811    let mut in_block = false;
812    let mut is_test_block = false;
813    let mut code_buf = String::new();
814    let mut block_indent_after_prefix = String::new();
815
816    for line in lines {
817        let body = doc_body(line).unwrap_or("");
818        let body_trim = body.trim_start();
819
820        if body_trim.starts_with("```") {
821            if !in_block {
822                // Opening fence. Decide if this is a doc-test fence.
823                let info = body_trim.trim_start_matches('`').trim();
824                is_test_block = info.is_empty()
825                    || info == "rust"
826                    || info.starts_with("rust,")
827                    || info.starts_with("rust ");
828                in_block = true;
829                code_buf.clear();
830                block_indent_after_prefix.clear();
831                // Capture the indentation that lives *between* `///` and
832                // the visible body, so we can reproduce it on output.
833                if let Some(stripped) = line.trim_start().strip_prefix(prefix) {
834                    let after = stripped;
835                    let extra_indent_len = after.len() - after.trim_start().len();
836                    block_indent_after_prefix = after[..extra_indent_len].to_string();
837                }
838                out.push_str(line);
839                out.push('\n');
840                continue;
841            }
842            // Closing fence: flush the buffered code (lowered if possible).
843            let lowered = if is_test_block {
844                try_lower_doctest(&code_buf).unwrap_or_else(|| code_buf.clone())
845            } else {
846                code_buf.clone()
847            };
848            for code_line in lowered.lines() {
849                out.push_str(indent);
850                out.push_str(prefix);
851                if !code_line.is_empty() {
852                    if block_indent_after_prefix.is_empty() {
853                        out.push(' ');
854                    } else {
855                        out.push_str(&block_indent_after_prefix);
856                    }
857                }
858                out.push_str(code_line);
859                out.push('\n');
860            }
861            out.push_str(line);
862            out.push('\n');
863            in_block = false;
864            code_buf.clear();
865            continue;
866        }
867
868        if in_block {
869            // Accumulate the raw body (minus the doc prefix + one space).
870            code_buf.push_str(body);
871            code_buf.push('\n');
872        } else {
873            out.push_str(line);
874            out.push('\n');
875        }
876    }
877
878    // Unclosed fence — emit the buffer verbatim to avoid losing content.
879    if in_block {
880        for code_line in code_buf.lines() {
881            out.push_str(indent);
882            out.push_str(prefix);
883            out.push(' ');
884            out.push_str(code_line);
885            out.push('\n');
886        }
887    }
888    out
889}
890
891/// Try to lower a doc-test snippet. Returns `Some(lowered)` if the
892/// rewriter produced new source; `None` if the snippet doesn't parse
893/// standalone (leave unchanged in that case).
894fn try_lower_doctest(snippet: &str) -> Option<String> {
895    // Strategy 1: snippet is a full Rust file (contains `fn main`, items, etc.).
896    if let Ok(out) = trust_lower::lower(snippet) {
897        if !out.diagnostics.iter().any(|d| d.is_error()) {
898            return Some(strip_hidden_doctest_prefix(out.source));
899        }
900    }
901    // Strategy 2: wrap as `fn __d() { … }` (snippet is a stmt sequence).
902    let wrapped = format!("fn __trust_doctest() {{\n{snippet}\n}}\n");
903    let out = trust_lower::lower(&wrapped).ok()?;
904    if out.diagnostics.iter().any(|d| d.is_error()) {
905        return None;
906    }
907    // Strip the wrapper. prettyplease emits a stable shape:
908    //     fn __trust_doctest() {
909    //         <body>
910    //     }
911    let unwrapped = unwrap_doctest_fn(&out.source)?;
912    Some(unwrapped)
913}
914
915fn unwrap_doctest_fn(source: &str) -> Option<String> {
916    let start = source.find("fn __trust_doctest()")?;
917    let open = source[start..].find('{')? + start;
918    // Find the matching close brace.
919    let bytes = source.as_bytes();
920    let mut depth = 0i32;
921    let mut close = None;
922    for (i, &b) in bytes.iter().enumerate().skip(open) {
923        match b {
924            b'{' => depth += 1,
925            b'}' => {
926                depth -= 1;
927                if depth == 0 {
928                    close = Some(i);
929                    break;
930                }
931            }
932            _ => {}
933        }
934    }
935    let close = close?;
936    let body = &source[open + 1..close];
937    // Strip leading/trailing blank lines and dedent four-space indent
938    // (prettyplease default).
939    let mut lines: Vec<String> = body.lines().map(|l| l.to_string()).collect();
940    while lines.first().is_some_and(|l| l.trim().is_empty()) {
941        lines.remove(0);
942    }
943    while lines.last().is_some_and(|l| l.trim().is_empty()) {
944        lines.pop();
945    }
946    let dedent = lines
947        .iter()
948        .filter(|l| !l.trim().is_empty())
949        .map(|l| l.len() - l.trim_start().len())
950        .min()
951        .unwrap_or(0);
952    let out: String = lines
953        .iter()
954        .map(|l| {
955            if l.len() >= dedent {
956                format!("{}\n", &l[dedent..])
957            } else {
958                "\n".to_string()
959            }
960        })
961        .collect();
962    Some(out)
963}
964
965/// rustdoc treats lines beginning with `# ` (after the doc-comment prefix)
966/// as hidden setup. Our lowering loses that distinction because we feed
967/// the raw body to syn. After lowering, restore the `# ` markers wouldn't
968/// be possible — so for now we just pass through (Rust file strategy
969/// already drops `#`-prefixed lines silently if they aren't syntax).
970fn strip_hidden_doctest_prefix(s: String) -> String {
971    s
972}
973
974#[cfg(test)]
975mod tests {
976    use super::*;
977
978    /// Serialises tests that read or write `TRUST_MESSAGE_FORMAT` — the
979    /// process env is shared across parallel test threads.
980    static MESSAGE_FORMAT_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
981
982    /// Scoped env guard: sets (or clears) `TRUST_MESSAGE_FORMAT` and restores
983    /// the previous value on drop, holding [`MESSAGE_FORMAT_LOCK`] throughout
984    /// so the env mutation can't leak into a concurrently-running test.
985    struct MessageFormatGuard<'a> {
986        prev: Option<String>,
987        _lock: std::sync::MutexGuard<'a, ()>,
988    }
989
990    impl MessageFormatGuard<'_> {
991        fn set(value: Option<&str>) -> Self {
992            let lock = MESSAGE_FORMAT_LOCK
993                .lock()
994                .unwrap_or_else(|poisoned| poisoned.into_inner());
995            let prev = env::var("TRUST_MESSAGE_FORMAT").ok();
996            match value {
997                Some(v) => env::set_var("TRUST_MESSAGE_FORMAT", v),
998                None => env::remove_var("TRUST_MESSAGE_FORMAT"),
999            }
1000            MessageFormatGuard { prev, _lock: lock }
1001        }
1002    }
1003
1004    impl Drop for MessageFormatGuard<'_> {
1005        fn drop(&mut self) {
1006            match &self.prev {
1007                Some(prev) => env::set_var("TRUST_MESSAGE_FORMAT", prev),
1008                None => env::remove_var("TRUST_MESSAGE_FORMAT"),
1009            }
1010        }
1011    }
1012
1013    /// RT-96: with `TRUST_MESSAGE_FORMAT=json`, the wrapper emits one
1014    /// machine-parseable JSON document per file (same shape as
1015    /// `trust check --format json`) instead of human `[R0001]` lines — and
1016    /// still bails because the diagnostic is an error.
1017    #[test]
1018    fn json_message_format_emits_parseable_document() {
1019        let _guard = MessageFormatGuard::set(Some("json"));
1020
1021        let source =
1022            "#![strict]\nfn main() { let v: Option<i32> = Some(1); let _ = v.unwrap(); }\n";
1023        let out = trust_lower::lower(source).expect("lowering strict source");
1024        let mut buf: Vec<u8> = Vec::new();
1025        let result = emit_diagnostics_to(&out, source, Path::new("src/main.rs"), &mut buf);
1026        assert!(result.is_err(), "R0001 is an error — must still bail");
1027
1028        let text = String::from_utf8(buf).expect("utf8 output");
1029        let doc: serde_json::Value =
1030            serde_json::from_str(text.trim()).expect("output must be valid JSON");
1031        assert_eq!(doc["file"], "src/main.rs");
1032        let rules: Vec<&str> = doc["diagnostics"]
1033            .as_array()
1034            .expect("diagnostics array")
1035            .iter()
1036            .filter_map(|d| d["rule"].as_str())
1037            .collect();
1038        assert!(rules.contains(&"R0001"), "expected R0001 in {rules:?}");
1039    }
1040
1041    /// Without the env var, output stays in today's human form.
1042    #[test]
1043    fn default_message_format_is_human_lines() {
1044        let _guard = MessageFormatGuard::set(None);
1045        let source =
1046            "#![strict]\nfn main() { let v: Option<i32> = Some(1); let _ = v.unwrap(); }\n";
1047        let out = trust_lower::lower(source).expect("lowering strict source");
1048        let mut buf: Vec<u8> = Vec::new();
1049        let result = emit_diagnostics_to(&out, source, Path::new("src/main.rs"), &mut buf);
1050        assert!(result.is_err());
1051        let text = String::from_utf8(buf).expect("utf8 output");
1052        assert!(
1053            text.contains("[R0001] error:"),
1054            "expected human line, got: {text}"
1055        );
1056    }
1057
1058    /// RT-88: files reachable only via `#[cfg(test)] mod x;` are exempt from
1059    /// force-strict — including transitively through plain `mod` decls in
1060    /// test-only files. Explicitly-marked or normally-declared files are not.
1061    #[test]
1062    fn cfg_test_mod_files_are_detected_transitively() {
1063        let base = std::env::temp_dir().join(format!("trust-rt88-{}", std::process::id()));
1064        let src = base.join("src");
1065        let _ = fs::remove_dir_all(&base);
1066        fs::create_dir_all(&src).unwrap();
1067        fs::write(
1068            src.join("main.rs"),
1069            "mod shipping;\n#[cfg(test)]\nmod tests;\nfn main() {}\n",
1070        )
1071        .unwrap();
1072        fs::write(src.join("shipping.rs"), "pub fn ship() {}\n").unwrap();
1073        fs::write(src.join("tests.rs"), "mod helpers;\nfn t() {}\n").unwrap();
1074        fs::write(src.join("helpers.rs"), "pub fn helper() {}\n").unwrap();
1075
1076        let test_only = collect_test_only_files(&src);
1077        let has = |name: &str| {
1078            test_only
1079                .iter()
1080                .any(|p| p.file_name().and_then(|f| f.to_str()) == Some(name))
1081        };
1082        assert!(has("tests.rs"), "directly cfg(test)-declared file");
1083        assert!(has("helpers.rs"), "transitively reached through tests.rs");
1084        assert!(!has("shipping.rs"), "normal mod stays enforced");
1085        assert!(!has("main.rs"), "the crate root is never test-only");
1086
1087        let _ = fs::remove_dir_all(&base);
1088    }
1089
1090    /// PR #1 review regression: `#[cfg(not(test))]` (and other negated test
1091    /// predicates) select PRODUCTION builds and must never be exempted from
1092    /// force-strict; positive `test` predicates (bare or inside any/all) are.
1093    #[test]
1094    fn negated_test_cfgs_are_not_test_only() {
1095        let base = std::env::temp_dir().join(format!("trust-pr1-{}", std::process::id()));
1096        let src = base.join("src");
1097        let _ = fs::remove_dir_all(&base);
1098        fs::create_dir_all(&src).unwrap();
1099        fs::write(
1100            src.join("main.rs"),
1101            "#[cfg(not(test))]\nmod prod;\n\
1102             #[cfg(all(unix, not(test)))]\nmod prod_unix;\n\
1103             #[cfg(all(unix, test))]\nmod unix_tests;\n\
1104             #[cfg(test)]\nmod tests;\n\
1105             #[cfg(feature = \"test\")]\nmod feature_named_test;\n\
1106             fn main() {}\n",
1107        )
1108        .unwrap();
1109        for name in [
1110            "prod.rs",
1111            "prod_unix.rs",
1112            "unix_tests.rs",
1113            "tests.rs",
1114            "feature_named_test.rs",
1115        ] {
1116            fs::write(src.join(name), "pub fn x() {}\n").unwrap();
1117        }
1118
1119        let test_only = collect_test_only_files(&src);
1120        let has = |name: &str| {
1121            test_only
1122                .iter()
1123                .any(|p| p.file_name().and_then(|f| f.to_str()) == Some(name))
1124        };
1125        assert!(!has("prod.rs"), "cfg(not(test)) is a production module");
1126        assert!(!has("prod_unix.rs"), "all(unix, not(test)) is production");
1127        assert!(has("unix_tests.rs"), "all(unix, test) is test-only");
1128        assert!(has("tests.rs"), "plain cfg(test) is test-only");
1129        assert!(
1130            !has("feature_named_test.rs"),
1131            "feature = \"test\" is a feature gate, not the test predicate"
1132        );
1133
1134        let _ = fs::remove_dir_all(&base);
1135    }
1136
1137    /// RT-81: project-level strict applies only to packages the user opted in,
1138    /// never to dependencies compiled by the same wrapper.
1139    #[test]
1140    fn force_strict_is_scoped_by_package_name() {
1141        // The user's own crate is in the set → forced strict.
1142        assert!(force_strict_for(Some("my-app"), Some("my-app")));
1143        // A dependency built in the same `cargo trustc build` carries its own
1144        // CARGO_PKG_NAME, which is NOT in the set → never force-lowered.
1145        assert!(!force_strict_for(Some("my-app"), Some("serde")));
1146        // Multi-package set, with whitespace.
1147        assert!(force_strict_for(Some("a, b ,c"), Some("b")));
1148        // Absent set or name is never a match.
1149        assert!(!force_strict_for(None, Some("my-app")));
1150        assert!(!force_strict_for(Some("my-app"), None));
1151        // Empty name must not match an empty element from a trailing comma.
1152        assert!(!force_strict_for(Some("a,"), Some("")));
1153    }
1154
1155    /// RT-75 regression: the cache mirror must COPY non-strict files, not
1156    /// hard-link them. A hard link shares the inode, so clobbering the cached
1157    /// copy would truncate the user's original source. This test mirrors a
1158    /// plain file, clobbers the cached copy, and asserts the source survives.
1159    #[test]
1160    fn mirror_copies_rather_than_hardlinks_source() {
1161        let base = std::env::temp_dir().join(format!("trust-rt75-{}", std::process::id()));
1162        let src = base.join("src");
1163        let dest = base.join("cache");
1164        let _ = fs::remove_dir_all(&base);
1165        fs::create_dir_all(&src).expect("create src");
1166        let src_file = src.join("plain.rs");
1167        fs::write(&src_file, "pub fn keep() {}\n").expect("write src");
1168
1169        let mut visited = std::collections::HashSet::new();
1170        mirror_module_tree(&src, &dest, &mut visited).expect("mirror");
1171
1172        // Clobber the cached copy to zero length.
1173        fs::write(dest.join("plain.rs"), "").expect("clobber cache");
1174
1175        // The original must be untouched — proving a copy, not a hard link.
1176        let after = fs::read_to_string(&src_file).expect("read src after");
1177        assert_eq!(
1178            after, "pub fn keep() {}\n",
1179            "source file was corrupted — cache shares an inode with it"
1180        );
1181        let _ = fs::remove_dir_all(&base);
1182    }
1183}
trust_rustc/lib.rs

trust_rustc/
lib.rs