Skip to main content

kellnr_docs/
doc_queue.rs

1use std::path::{Path, PathBuf};
2use std::sync::Arc;
3
4use cargo::GlobalContext;
5use cargo::core::Workspace;
6use cargo::core::compiler::UserIntent;
7use cargo::core::resolver::CliFeatures;
8use cargo::ops::{self, CompileOptions, DocOptions, OutputFormat};
9use flate2::read::GzDecoder;
10use fs_extra::dir::{CopyOptions, copy};
11use kellnr_common::original_name::OriginalName;
12use kellnr_common::version::Version;
13use kellnr_db::{DbProvider, DocQueueEntry};
14use kellnr_storage::kellnr_crate_storage::KellnrCrateStorage;
15use tar::Archive;
16use tokio::fs::{create_dir_all, remove_dir_all};
17use tracing::error;
18
19use crate::compute_doc_url;
20use crate::docs_error::DocsError;
21
22pub fn doc_extraction_queue(
23    db: Arc<dyn DbProvider>,
24    cs: Arc<KellnrCrateStorage>,
25    docs_path: PathBuf,
26    path_prefix: String,
27    cratesio_index: Option<String>,
28) {
29    tokio::spawn(async move {
30        loop {
31            tokio::time::sleep(std::time::Duration::from_secs(10)).await;
32            if let Err(e) = inner_loop(
33                db.clone(),
34                &cs,
35                &docs_path,
36                &path_prefix,
37                cratesio_index.as_deref(),
38            )
39            .await
40            {
41                error!("Rustdoc generation loop failed: {e}");
42            }
43        }
44    });
45}
46
47async fn inner_loop(
48    db: Arc<dyn DbProvider>,
49    cs: &KellnrCrateStorage,
50    docs_path: &Path,
51    path_prefix: &str,
52    cratesio_index: Option<&str>,
53) -> Result<(), DocsError> {
54    let entries = db.get_doc_queue().await?;
55
56    for entry in entries {
57        if let Err(e) = extract_docs(&entry, cs, docs_path, cratesio_index).await {
58            error!("Failed to extract docs from crate: {e}");
59        } else {
60            if let Err(e) = clean_up(&entry.path).await {
61                error!("Failed to delete temporary rustdoc queue folder: {e}");
62            }
63
64            let version = Version::from_unchecked_str(&entry.version);
65            let docs_link = compute_doc_url(&entry.normalized_name, &version, path_prefix);
66            db.update_docs_link(&entry.normalized_name, &version, &docs_link)
67                .await?;
68        }
69        db.delete_doc_queue(entry.id).await?;
70    }
71
72    Ok(())
73}
74
75async fn extract_docs(
76    doc: &DocQueueEntry,
77    cs: &KellnrCrateStorage,
78    docs_path: &Path,
79    cratesio_index: Option<&str>,
80) -> Result<(), DocsError> {
81    // Unpack crate
82
83    // TODO: Only works if normalized name = original name -> Need to get original name from db
84    let orig_name = OriginalName::from_unchecked(doc.normalized_name.to_string());
85    let version = Version::from_unchecked_str(&doc.version);
86    let contents = cs.get(&orig_name, &version).await.ok_or_else(|| {
87        error!("Failed to get crate from storage");
88        DocsError::CrateDoesNotExist(doc.normalized_name.to_string(), doc.version.clone())
89    })?;
90    let tar = GzDecoder::new(std::io::Cursor::new(contents));
91    let mut archive = Archive::new(tar);
92    archive.unpack(&doc.path)?;
93
94    // Generate the docs
95    let generated_docs_path = &doc
96        .path
97        .join(format!("{}-{}", doc.normalized_name, doc.version));
98    strip_rust_toolchain_files(generated_docs_path).await?;
99    generate_docs(generated_docs_path, cratesio_index)?;
100
101    // Copy the docs directory
102    let from = generated_docs_path.join("target").join("doc");
103    let to = docs_path
104        .join(doc.normalized_name.to_string())
105        .join(&doc.version);
106    copy_dir(&from, &to).await?;
107
108    Ok(())
109}
110
111async fn clean_up(path: &Path) -> Result<(), DocsError> {
112    remove_dir_all(path).await?;
113    Ok(())
114}
115
116/// Remove any `rust-toolchain.toml` (or legacy `rust-toolchain`) at the crate
117/// root before invoking cargo.
118///
119/// These files are a rustup feature for pinning a local development or CI
120/// toolchain. They were never intended as a contract with downstream
121/// consumers: the canonical way to declare a minimum supported Rust version
122/// is `package.rust-version` in `Cargo.toml`, which cargo's resolver honors
123/// without swapping out the compiler.
124///
125/// However, cargo does not exclude these files from `cargo publish` by
126/// default, so some crates accidentally ship them. When that happens, the
127/// rustup proxy that backs `rustc` inside the Kellnr container walks up from
128/// the working directory, finds the file, and silently switches to the
129/// pinned toolchain (downloading it if necessary). If the pin predates a
130/// stable feature the crate now relies on (e.g. `check-cfg`), `cargo doc`
131/// fails with errors that look like bugs in the crate or in Kellnr but are
132/// really an invisible toolchain swap. See issue #1176.
133///
134/// Dropping the file here only covers the crate currently being documented.
135/// Transitive dependencies that ship the same accident are unpacked by cargo
136/// into its own registry cache and need `RUSTUP_TOOLCHAIN` set in the
137/// container environment to neutralize.
138async fn strip_rust_toolchain_files(crate_path: &Path) -> Result<(), DocsError> {
139    for name in ["rust-toolchain.toml", "rust-toolchain"] {
140        let path = crate_path.join(name);
141        match tokio::fs::remove_file(&path).await {
142            Ok(()) => {}
143            Err(e) if e.kind() == std::io::ErrorKind::NotFound => {}
144            Err(e) => return Err(e.into()),
145        }
146    }
147    Ok(())
148}
149
150async fn copy_dir(from: &Path, to: &Path) -> Result<(), DocsError> {
151    create_dir_all(to).await?;
152    copy(
153        from,
154        to,
155        &CopyOptions {
156            overwrite: true,
157            ..CopyOptions::default()
158        },
159    )?;
160    Ok(())
161}
162
163/// Build the cargo [`GlobalContext`] used to document a crate.
164///
165/// When a custom crates.io proxy index is configured (`cratesio_index`), point
166/// cargo's `crates-io` source at it via source replacement so dependency
167/// resolution for `cargo doc` honors `proxy.index` instead of fetching from the
168/// upstream `index.crates.io`. See issue #1185.
169fn build_doc_context(cratesio_index: Option<&str>) -> Result<GlobalContext, DocsError> {
170    let mut ctx = GlobalContext::default().map_err(|e| DocsError::CargoError(e.to_string()))?;
171
172    if let Some(index) = cratesio_index {
173        let cli_config = vec![
174            "source.crates-io.replace-with=\"kellnr-proxy\"".to_string(),
175            format!("source.kellnr-proxy.registry=\"sparse+{index}\""),
176        ];
177        ctx.configure(0, false, None, false, false, false, &None, &[], &cli_config)
178            .map_err(|e| DocsError::CargoError(e.to_string()))?;
179    }
180
181    Ok(ctx)
182}
183
184fn generate_docs(
185    crate_path: impl AsRef<Path>,
186    cratesio_index: Option<&str>,
187) -> Result<(), DocsError> {
188    let manifest_path = crate_path.as_ref().join("Cargo.toml").canonicalize()?;
189    let ctx = build_doc_context(cratesio_index)?;
190    let workspace =
191        Workspace::new(&manifest_path, &ctx).map_err(|e| DocsError::CargoError(e.to_string()))?;
192    let compile_opts = CompileOptions {
193        cli_features: CliFeatures::new_all(true),
194        ..CompileOptions::new(
195            &ctx,
196            UserIntent::Doc {
197                deps: false,
198                json: false,
199            },
200        )
201        .map_err(|e| DocsError::CargoError(e.to_string()))?
202    };
203    let options = DocOptions {
204        open_result: false,
205        compile_opts,
206        output_format: OutputFormat::Html,
207    };
208    ops::doc(&workspace, &options).map_err(|e| DocsError::CargoError(e.to_string()))?;
209    Ok(())
210}
211
212#[cfg(test)]
213mod tests {
214    use std::collections::HashSet;
215
216    use cargo::core::SourceId;
217    use cargo::sources::SourceConfigMap;
218
219    use super::*;
220
221    #[test]
222    fn no_index_override_does_not_inject_proxy_source() {
223        let ctx = build_doc_context(None).unwrap();
224        // Without an override we never define the kellnr-proxy source, so cargo
225        // keeps whatever crates.io source the ambient environment provides
226        // (the upstream index.crates.io in the kellnr container). Asserting on
227        // `crates-io.replace-with` directly would be environment-dependent,
228        // since a developer's ~/.cargo/config.toml may itself replace it.
229        assert!(
230            ctx.get_string("source.kellnr-proxy.registry")
231                .unwrap()
232                .is_none()
233        );
234    }
235
236    #[test]
237    fn index_override_replaces_crates_io_source_with_proxy() {
238        let ctx = build_doc_context(Some("https://rsproxy.cn/index/")).unwrap();
239
240        // The CLI config overrides are well-formed and applied by cargo.
241        assert_eq!(
242            ctx.get_string("source.crates-io.replace-with")
243                .unwrap()
244                .map(|v| v.val),
245            Some("kellnr-proxy".to_string())
246        );
247
248        // Cargo's own source resolution maps the crates.io source to the
249        // configured proxy index instead of index.crates.io. See issue #1185.
250        let map = SourceConfigMap::new(&ctx).unwrap();
251        let crates_io = SourceId::crates_io(&ctx).unwrap();
252        let source = map.load(crates_io, &HashSet::new()).unwrap();
253        let replaced = source.replaced_source_id();
254        assert!(!replaced.is_crates_io());
255        assert!(
256            replaced.url().as_str().contains("rsproxy.cn/index/"),
257            "expected replacement source to point at the proxy index, got {}",
258            replaced.url()
259        );
260    }
261
262    #[tokio::test]
263    async fn strip_rust_toolchain_files_removes_both_variants() {
264        let dir = tempfile::tempdir().unwrap();
265        let toml_path = dir.path().join("rust-toolchain.toml");
266        let legacy_path = dir.path().join("rust-toolchain");
267        tokio::fs::write(&toml_path, "[toolchain]\nchannel = \"1.65.0\"\n")
268            .await
269            .unwrap();
270        tokio::fs::write(&legacy_path, "1.65.0\n").await.unwrap();
271
272        strip_rust_toolchain_files(dir.path()).await.unwrap();
273
274        assert!(!toml_path.exists());
275        assert!(!legacy_path.exists());
276    }
277
278    #[tokio::test]
279    async fn strip_rust_toolchain_files_noop_when_absent() {
280        let dir = tempfile::tempdir().unwrap();
281        let unrelated = dir.path().join("Cargo.toml");
282        tokio::fs::write(&unrelated, "[package]\nname = \"x\"\n")
283            .await
284            .unwrap();
285
286        strip_rust_toolchain_files(dir.path()).await.unwrap();
287
288        assert!(unrelated.exists());
289    }
290
291    #[tokio::test]
292    async fn strip_rust_toolchain_files_only_removes_named_files() {
293        let dir = tempfile::tempdir().unwrap();
294        let toml_path = dir.path().join("rust-toolchain.toml");
295        let cargo_toml = dir.path().join("Cargo.toml");
296        let src = dir.path().join("src");
297        tokio::fs::write(&toml_path, "").await.unwrap();
298        tokio::fs::write(&cargo_toml, "").await.unwrap();
299        tokio::fs::create_dir(&src).await.unwrap();
300
301        strip_rust_toolchain_files(dir.path()).await.unwrap();
302
303        assert!(!toml_path.exists());
304        assert!(cargo_toml.exists());
305        assert!(src.exists());
306    }
307}