Skip to main content

hypha/visitor/
mod.rs

1//! Visitor module for tasting and resolving spores from the network.
2//!
3//! Resolution flow:
4//! 1. Parse CMN URI (cmn://domain/hash)
5//! 2. Get cmn.json (from cache or fetch)
6//! 3. Use endpoint template to build actual URL
7//! 4. Fetch and verify spore manifest
8//! 5. Verify signature against public key from cmn.json
9//! 6. Download content and verify hash matches URI
10
11use serde::Serialize;
12use serde_json::json;
13use std::path::Path;
14use std::process::ExitCode;
15
16use crate::api::Output;
17use crate::cache::{CacheDir, DomainCache, TasteVerdictCache};
18use substrate::{CmnEntry, CmnUri, PrettyJson};
19
20mod absorb;
21mod bond;
22mod crypto;
23mod distribution;
24pub(crate) mod extract;
25mod fetch;
26mod grow;
27mod lineage;
28mod search;
29mod sense;
30mod spawn;
31mod taste;
32mod verify;
33
34/// Structured error for archive extraction and file copy operations.
35#[derive(Debug, thiserror::Error)]
36pub enum ExtractError {
37    /// Content is actively dangerous (symlinks, path traversal, zip bombs).
38    /// Treated as an unverified delivery failure; callers clean up without
39    /// persisting a toxic taste verdict.
40    #[error("MALICIOUS: {0}")]
41    Malicious(String),
42    /// Local receive/cache policy rejected otherwise valid content.
43    #[error("{0}")]
44    PolicyRejected(String),
45    /// Non-malicious failure (I/O error, unsupported format, etc.).
46    #[error("{0}")]
47    Failed(String),
48}
49
50impl ExtractError {
51    pub fn is_malicious(&self) -> bool {
52        matches!(self, Self::Malicious(_))
53    }
54
55    pub fn is_policy_rejected(&self) -> bool {
56        matches!(self, Self::PolicyRejected(_))
57    }
58}
59
60impl From<String> for ExtractError {
61    fn from(s: String) -> Self {
62        Self::Failed(s)
63    }
64}
65
66impl From<substrate::archive::ExtractError> for ExtractError {
67    fn from(e: substrate::archive::ExtractError) -> Self {
68        match e {
69            substrate::archive::ExtractError::Malicious(msg) => Self::Malicious(msg),
70            substrate::archive::ExtractError::Failed(msg) => Self::Failed(msg),
71        }
72    }
73}
74
75// Re-export extract module items for internal use
76use extract::LimitedWriter;
77pub(crate) use extract::{
78    decode_delta_to_raw_tar_file, download_and_extract_to_dir, download_file,
79    ensure_no_rejected_path_components, load_old_archive_dictionary, rejected_path_component,
80    DeltaByteBudget, ExtractLimits,
81};
82
83// Re-export all public items so external callers don't break
84pub use absorb::{absorb, handle_absorb};
85pub use bond::{bond_fetch, handle_bond_fetch};
86pub use crypto::{
87    embedded_spore_author_key, fetch_spore_manifest, get_cmn_entry, verify_content_hash,
88    verify_manifest_two_key_signatures, verify_spore_with_key_trust,
89};
90use distribution::{
91    build_archive_delta_url_from_endpoint, build_archive_url_from_endpoint, dist_git_ref,
92    dist_git_url, dist_has_type, is_safe_bond_dir_name,
93};
94pub(crate) use fetch::fetch_spore_to_cache;
95use fetch::{clone_git_to_dir, fetch_bonds, fetch_cmn_json, fetch_opts};
96pub use grow::{grow, handle_grow};
97pub use lineage::{handle_lineage, lineage_in, lineage_out};
98pub use search::{handle_search, search, search_with_bond};
99pub use sense::{handle_sense, sense, sense_with_id};
100pub use spawn::{handle_spawn, spawn};
101pub use taste::{check_taste, check_taste_verdict_for_replicate, handle_taste, taste};
102pub(crate) use verify::fetch_verified_spore;
103use verify::{
104    can_synapse_fallback, mtime_epoch_ms, primary_capsule, resolve_default_synapse_url,
105    verify_downloaded_content, warn_remove_dir,
106};
107
108// Cross-submodule imports: these are brought into scope here so that
109// submodules using `use super::*` can access sibling module functions.
110use bond::bond_in_dir;
111use crypto::{verify_manifest_capsule_signature, verify_manifest_core_signature};
112use spawn::{cache_archive_raw_file, download_and_apply_delta, extract_archive};
113
114// URI parsing tests are in substrate/src/uri.rs
115
116#[cfg(test)]
117#[allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)]
118mod tests {
119
120    use super::*;
121
122    fn sanitize_for_path(input: &str) -> String {
123        substrate::local_dir_name(None, Some(input), "spore")
124    }
125
126    #[test]
127    fn test_sanitize_for_path_basic() {
128        assert_eq!(sanitize_for_path("cmn-spec"), "cmn-spec");
129        assert_eq!(sanitize_for_path("my_project"), "my_project");
130    }
131
132    #[test]
133    fn test_sanitize_for_path_spaces() {
134        assert_eq!(
135            sanitize_for_path("CMN Protocol Specification"),
136            "CMN-Protocol-Specification"
137        );
138        assert_eq!(sanitize_for_path("a  b"), "a--b");
139    }
140
141    #[test]
142    fn test_sanitize_for_path_forbidden_chars() {
143        assert_eq!(sanitize_for_path("foo/bar"), "foo-bar");
144        assert_eq!(sanitize_for_path("a:b*c?d"), "a-b-c-d");
145    }
146
147    #[test]
148    fn test_sanitize_for_path_unicode_preserved() {
149        assert_eq!(sanitize_for_path("CMN协议规范"), "CMN协议规范");
150        assert_eq!(sanitize_for_path("数据库工具"), "数据库工具");
151        assert_eq!(sanitize_for_path("cafe\u{301}-utils"), "cafe\u{301}-utils");
152    }
153
154    #[test]
155    fn test_sanitize_for_path_empty_fallback() {
156        assert_eq!(sanitize_for_path(""), "spore");
157        assert_eq!(sanitize_for_path("---"), "spore");
158    }
159
160    #[test]
161    fn test_sanitize_for_path_traversal_safe() {
162        assert_eq!(sanitize_for_path(".."), "spore");
163        assert_eq!(sanitize_for_path("."), "spore");
164        assert_eq!(sanitize_for_path("../etc"), "-etc");
165        assert_eq!(sanitize_for_path(".git"), "git");
166        assert_eq!(sanitize_for_path(".cmn"), "cmn");
167        assert_eq!(sanitize_for_path("...hidden"), "hidden");
168    }
169
170    #[test]
171    fn test_sanitize_for_path_control_chars() {
172        assert_eq!(sanitize_for_path("foo\0bar"), "foo-bar");
173        assert_eq!(sanitize_for_path("\x01\x02"), "spore");
174        assert_eq!(sanitize_for_path("ok\x7f"), "ok");
175    }
176
177    #[test]
178    fn test_spawned_from_hash_present() {
179        let manifest = serde_json::json!({
180            "$schema": "https://cmn.dev/schemas/v1/spore.json",
181            "capsule": {
182                "uri": "cmn://example.com/b3.child",
183                "core": {
184                    "name": "test",
185                    "domain": "example.com",
186                    "key": "ed25519.5XmkQ9vZP8nL",
187                    "synopsis": "Test",
188                    "intent": ["Testing"],
189                    "license": "MIT",
190                    "mutations": [],
191                    "size_bytes": 512,
192                    "updated_at_epoch_ms": 1700000000000_u64,
193                    "bonds": [
194                        {"uri": "cmn://example.com/b3.3yMR7vZQ9hL", "relation": "spawned_from"}
195                    ],
196                    "tree": { "algorithm": "blob_tree_blake3_nfc", "exclude_names": [], "follow_rules": [] }
197                },
198                "core_signature": "sig",
199                "dist": [{"type": "archive"}]
200            },
201            "capsule_signature": "sig"
202        });
203        assert_eq!(
204            grow::spawned_from_hash(&manifest),
205            Some("b3.3yMR7vZQ9hL".to_string())
206        );
207    }
208
209    #[test]
210    fn test_spawned_from_hash_missing() {
211        let manifest = serde_json::json!({
212            "$schema": "https://cmn.dev/schemas/v1/spore.json",
213            "capsule": {
214                "uri": "cmn://example.com/b3.child",
215                "core": {
216                    "name": "test",
217                    "domain": "example.com",
218                    "key": "ed25519.5XmkQ9vZP8nL",
219                    "synopsis": "Test",
220                    "intent": ["Testing"],
221                    "license": "MIT",
222                    "mutations": [],
223                    "size_bytes": 512,
224                    "updated_at_epoch_ms": 1700000000000_u64,
225                    "bonds": [
226                        {"uri": "cmn://example.com/b3.8cQnH4xPmZ2v", "relation": "depends_on"}
227                    ],
228                    "tree": { "algorithm": "blob_tree_blake3_nfc", "exclude_names": [], "follow_rules": [] }
229                },
230                "core_signature": "sig",
231                "dist": [{"type": "archive"}]
232            },
233            "capsule_signature": "sig"
234        });
235        assert_eq!(grow::spawned_from_hash(&manifest), None);
236    }
237
238    #[test]
239    fn test_spawned_from_hash_no_bonds() {
240        let manifest = serde_json::json!({
241            "$schema": "https://cmn.dev/schemas/v1/spore.json",
242            "capsule": {
243                "uri": "cmn://example.com/b3.child",
244                "core": {
245                    "name": "test",
246                    "domain": "example.com",
247                    "synopsis": "Test",
248                    "intent": ["Testing"],
249                    "license": "MIT"
250                },
251                "core_signature": "sig"
252            },
253            "capsule_signature": "sig"
254        });
255        assert_eq!(grow::spawned_from_hash(&manifest), None);
256    }
257
258    #[test]
259    fn test_spawned_from_hash_empty_manifest() {
260        let manifest = serde_json::json!({});
261        assert_eq!(grow::spawned_from_hash(&manifest), None);
262    }
263
264    fn test_client() -> reqwest::Client {
265        reqwest::Client::builder()
266            .timeout(std::time::Duration::from_secs(1))
267            .build()
268            .unwrap()
269    }
270
271    /// Verify substrate::client::search accepts the bond_filter parameter.
272    /// Uses a non-routable address so the HTTP call fails fast.
273    #[tokio::test]
274    async fn test_fetch_search_with_bond() {
275        let result = substrate::client::search(
276            &test_client(),
277            "http://127.0.0.1:1",
278            "test",
279            None,
280            None,
281            Some("spawned_from:cmn://d.dev/b3.3yMR7vZQ9hL"),
282            5,
283            Default::default(),
284        )
285        .await;
286        assert!(result.is_err());
287    }
288
289    /// Verify substrate::client::search works without bond_filter.
290    #[tokio::test]
291    async fn test_fetch_search_without_bond() {
292        let result = substrate::client::search(
293            &test_client(),
294            "http://127.0.0.1:1",
295            "test",
296            Some("cmn.dev"),
297            Some("MIT"),
298            None,
299            10,
300            Default::default(),
301        )
302        .await;
303        assert!(result.is_err());
304    }
305
306    /// Verify substrate::client::search with comma-separated bond filters.
307    #[tokio::test]
308    async fn test_fetch_search_with_multi_bond() {
309        let result = substrate::client::search(
310            &test_client(),
311            "http://127.0.0.1:1",
312            "tools",
313            None,
314            None,
315            Some("spawned_from:cmn://a.dev/b3.3yMR7vZQ9hL,follows:cmn://b.dev/b3.8cQnH4xPmZ2v"),
316            20,
317            Default::default(),
318        )
319        .await;
320        assert!(result.is_err());
321    }
322
323    /// search_with_bond with bond_filter=None delegates to the same path as search().
324    /// Both should produce the same error when pointed at an unreachable synapse.
325    #[tokio::test]
326    async fn test_search_with_bond_none_delegates() {
327        let result_with_ref = search_with_bond(
328            "test",
329            Some("http://127.0.0.1:1"),
330            None,
331            None,
332            None,
333            None,
334            20,
335            &crate::NoopSink,
336        )
337        .await;
338        let result_plain = search(
339            "test",
340            Some("http://127.0.0.1:1"),
341            None,
342            None,
343            None,
344            20,
345            &crate::NoopSink,
346        )
347        .await;
348        assert!(result_with_ref.is_err());
349        assert!(result_plain.is_err());
350    }
351
352    /// search_with_bond with a bond_filter should also fail at the HTTP level
353    /// (not at argument handling).
354    #[tokio::test]
355    async fn test_search_with_bond_passes_bond_through() {
356        let result = search_with_bond(
357            "http client",
358            Some("http://127.0.0.1:1"),
359            None,
360            Some("cmn.dev"),
361            Some("MIT"),
362            Some("spawned_from:cmn://cmn.dev/b3.3yMR7vZQ9hL"),
363            10,
364            &crate::NoopSink,
365        )
366        .await;
367        assert!(result.is_err());
368        let err = result.unwrap_err().to_string();
369        // Should fail at HTTP, not at bond parsing
370        assert!(
371            err.contains("synapse_error"),
372            "should fail at HTTP level: {}",
373            err
374        );
375    }
376}