Skip to main content

rho_core/
lib.rs

1// Lets the command modules (which refer to crate items as `rho_core::...`) compile
2// both here in the library and in the `rho` binary.
3extern crate self as rho_core;
4
5use std::ffi::OsStr;
6use std::fs::{self, File};
7use std::io::{self, Read};
8use std::path::{Path, PathBuf};
9use std::time::{SystemTime, UNIX_EPOCH};
10
11use serde::{Deserialize, Serialize};
12use sha2::{Digest, Sha256};
13
14pub type RhoResult<T> = Result<T, Box<dyn std::error::Error>>;
15
16pub mod commands;
17
18pub mod providers {
19    use crate::RhoResult;
20    use std::path::Path;
21    use std::process::Command;
22
23    pub mod github {
24        use super::{Command, Path, RhoResult};
25
26        pub fn repo_candidate_from_remote(remote: &str) -> Option<String> {
27            repo_candidate_from_remote_with_host_resolver(remote, ssh_host_is_github)
28        }
29
30        pub fn repo_candidate_from_remote_with_host_resolver<F>(
31            remote: &str,
32            host_is_github: F,
33        ) -> Option<String>
34        where
35            F: Fn(&str) -> bool,
36        {
37            if let Some(path) = remote.strip_prefix("https://github.com/") {
38                return slug_from_remote_path(path);
39            }
40            if let Some(path) = remote.strip_prefix("git@github.com:") {
41                return slug_from_remote_path(path);
42            }
43            if let Some(rest) = remote.strip_prefix("git@")
44                && let Some((host, path)) = rest.split_once(':')
45                && (host == "github.com" || host_is_github(host))
46            {
47                return slug_from_remote_path(path);
48            }
49            if let Some(rest) = remote.strip_prefix("ssh://git@")
50                && let Some((host, path)) = rest.split_once('/')
51                && (host == "github.com" || host_is_github(host))
52            {
53                return slug_from_remote_path(path);
54            }
55            None
56        }
57
58        fn slug_from_remote_path(path: &str) -> Option<String> {
59            let path = path.trim_end_matches(".git").trim_matches('/');
60            let mut parts = path.split('/');
61            let owner = parts.next()?;
62            let repo = parts.next()?;
63            if parts.next().is_some() || owner.is_empty() || repo.is_empty() {
64                return None;
65            }
66            Some(format!("{owner}/{repo}"))
67        }
68
69        fn ssh_host_is_github(host: &str) -> bool {
70            let output = Command::new("ssh").args(["-G", host]).output();
71            let Ok(output) = output else {
72                return false;
73            };
74            if !output.status.success() {
75                return false;
76            }
77            let config = String::from_utf8_lossy(&output.stdout);
78            config.lines().any(|line| {
79                let mut fields = line.split_whitespace();
80                matches!(
81                    (fields.next(), fields.next(), fields.next()),
82                    (Some("hostname"), Some("github.com"), None)
83                )
84            })
85        }
86
87        pub fn create_pull_request(
88            root: &Path,
89            title: &str,
90            body: &str,
91            open_browser: bool,
92        ) -> RhoResult<String> {
93            let existing = Command::new("gh")
94                .current_dir(root)
95                .args(["pr", "view", "--json", "url", "--jq", ".url"])
96                .output();
97            if let Ok(existing) = existing
98                && existing.status.success()
99            {
100                let url = String::from_utf8(existing.stdout)?.trim().to_string();
101                if !url.is_empty() {
102                    return Ok(url);
103                }
104            }
105            let mut command = Command::new("gh");
106            command
107                .current_dir(root)
108                .args(["pr", "create", "--title", title, "--body", body]);
109            if open_browser {
110                command.arg("--web");
111            }
112            let output = command.output()?;
113            if !output.status.success() {
114                let stderr = String::from_utf8_lossy(&output.stderr);
115                return Err(format!("gh pr create failed: {}", stderr.trim()).into());
116            }
117            Ok(String::from_utf8(output.stdout)?.trim().to_string())
118        }
119    }
120}
121
122#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
123pub struct RequestManifest {
124    pub version: u32,
125    pub request: RunRequest,
126}
127
128#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
129pub struct RunRequest {
130    pub id: String,
131    pub from: String,
132    pub to: String,
133    pub tool_id: String,
134    pub dataset_uuid: String,
135    pub code_paths: Vec<String>,
136    pub code_sha256: String,
137    pub command: Vec<String>,
138    pub requested_tier: String,
139    pub created_at: String,
140}
141
142#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
143pub struct ApprovalManifest {
144    pub version: u32,
145    pub approval: Approval,
146}
147
148#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
149pub struct Approval {
150    pub request_id: String,
151    pub decision: String,
152    pub approver: String,
153    pub note: String,
154    pub created_at: String,
155}
156
157#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
158pub struct RunManifest {
159    pub version: u32,
160    pub run: RunRecord,
161}
162
163#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
164pub struct RunRecord {
165    pub id: String,
166    pub request_id: String,
167    pub status: String,
168    pub tier: String,
169    #[serde(skip_serializing_if = "Option::is_none")]
170    pub runner: Option<String>,
171    #[serde(skip_serializing_if = "Option::is_none")]
172    pub dataset_csv: Option<String>,
173    #[serde(skip_serializing_if = "Option::is_none")]
174    pub code_path: Option<String>,
175    #[serde(skip_serializing_if = "Option::is_none")]
176    pub code_sha256: Option<String>,
177    pub command: Vec<String>,
178    #[serde(skip_serializing_if = "Option::is_none")]
179    pub exit_code: Option<i32>,
180    #[serde(skip_serializing_if = "Option::is_none")]
181    pub error: Option<String>,
182    pub stdout_path: String,
183    pub stderr_path: String,
184    pub created_at: String,
185}
186
187#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
188pub struct SandboxRunManifest {
189    pub version: u32,
190    pub sandbox_run: SandboxRunRecord,
191}
192
193#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
194pub struct SandboxRunRecord {
195    pub id: String,
196    pub request_id: String,
197    pub runner: String,
198    pub tier: String,
199    pub dataset_csv: String,
200    pub code_path: String,
201    pub command: Vec<String>,
202    pub artifact_dir: String,
203    pub stdout_path: String,
204    pub stderr_path: String,
205    pub mounts: Vec<SandboxMount>,
206    pub network: SandboxNetworkPolicy,
207    pub created_at: String,
208}
209
210#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
211pub struct SandboxMount {
212    pub host_path: String,
213    pub guest_path: String,
214    pub mode: String,
215}
216
217#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
218pub struct SandboxNetworkPolicy {
219    pub default_deny: bool,
220    #[serde(default)]
221    pub allow_hosts: Vec<String>,
222    #[serde(default)]
223    pub tcp_maps: Vec<String>,
224}
225
226#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
227pub struct ControlledActionManifest {
228    pub version: u32,
229    pub kind: String,
230    pub action: ControlledAction,
231}
232
233#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
234pub struct ControlledAction {
235    pub action_id: String,
236    pub request_id: String,
237    pub tool_id: String,
238    pub requested_by: String,
239    pub requested_for: String,
240    pub action_type: String,
241    pub summary: String,
242    pub reason: String,
243    #[serde(default, skip_serializing_if = "Option::is_none")]
244    pub input_path: Option<String>,
245    #[serde(default, skip_serializing_if = "Option::is_none")]
246    pub script_path: Option<String>,
247    pub output_path: String,
248}
249
250#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
251pub struct ProposedActionManifest {
252    pub version: u32,
253    pub proposed_action: ProposedAction,
254}
255
256#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
257pub struct ProposedAction {
258    pub action_id: String,
259    pub request_id: String,
260    pub tool_id: String,
261    pub requested_by: String,
262    pub requested_for: String,
263    pub action_type: String,
264    pub script_path: String,
265    pub output_path: String,
266    pub summary: String,
267    pub reason: String,
268}
269
270#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
271pub struct ToolManifest {
272    pub version: u32,
273    pub tool: Tool,
274}
275
276#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
277pub struct Tool {
278    pub id: String,
279    pub action_type: String,
280    pub owner: String,
281    pub approval_required: bool,
282    #[serde(default, skip_serializing_if = "Vec::is_empty")]
283    pub command_template: Vec<String>,
284}
285
286#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
287pub struct ActionGrantManifest {
288    pub version: u32,
289    pub action_grant: ActionGrant,
290}
291
292#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
293pub struct ActionGrant {
294    pub action_id: String,
295    pub request_id: String,
296    pub tool_id: String,
297    pub action_type: String,
298    pub decision: String,
299    pub granted_by: String,
300    pub created_at: String,
301    pub action: GrantedActionFile,
302    pub repo: GrantedRepoState,
303    #[serde(default)]
304    pub inputs: Vec<GrantedInput>,
305}
306
307#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
308pub struct GrantedActionFile {
309    pub path: String,
310    pub sha256: String,
311}
312
313#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
314pub struct GrantedRepoState {
315    #[serde(skip_serializing_if = "Option::is_none")]
316    pub git_commit: Option<String>,
317}
318
319#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
320pub struct GrantedInput {
321    pub kind: String,
322    pub path: String,
323    pub sha256: String,
324}
325
326#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
327pub struct ControlledActionStatusManifest {
328    pub version: u32,
329    pub status: ControlledActionStatus,
330}
331
332#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
333pub struct ControlledActionStatus {
334    pub action_id: String,
335    pub request_id: String,
336    pub status: String,
337    #[serde(skip_serializing_if = "Option::is_none")]
338    pub run_id: Option<String>,
339    #[serde(skip_serializing_if = "Option::is_none")]
340    pub stdout_path: Option<String>,
341    #[serde(skip_serializing_if = "Option::is_none")]
342    pub error: Option<String>,
343    pub created_at: String,
344}
345
346#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
347pub struct IdentityBundleManifest {
348    pub version: u32,
349    pub identity: IdentityBundle,
350}
351
352#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
353pub struct IdentityBundle {
354    pub id: String,
355    pub kind: String,
356    pub handle: String,
357    pub display_name: String,
358    pub public_keys: Vec<IdentityPublicKey>,
359    pub proofs: Vec<IdentityProof>,
360    pub created_at: String,
361}
362
363#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
364pub struct IdentityPublicKey {
365    pub id: String,
366    pub kind: String,
367    pub algorithm: String,
368    pub public_key: String,
369    pub fingerprint: String,
370    pub created_at: String,
371}
372
373#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
374pub struct IdentityProof {
375    pub kind: String,
376    #[serde(skip_serializing_if = "Option::is_none")]
377    pub provider_url: Option<String>,
378    #[serde(skip_serializing_if = "Option::is_none")]
379    pub claim: Option<String>,
380    pub proof_url: String,
381    #[serde(skip_serializing_if = "Option::is_none")]
382    pub verified_at: Option<String>,
383}
384
385#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
386pub struct LocalIdentityManifest {
387    pub version: u32,
388    pub local_identity: LocalIdentity,
389}
390
391#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
392pub struct LocalIdentity {
393    pub identity: IdentityBundle,
394    pub signing_key: LocalSigningKey,
395    #[serde(default, skip_serializing_if = "Option::is_none")]
396    pub encryption_key: Option<LocalEncryptionKey>,
397    #[serde(default, skip_serializing_if = "Option::is_none")]
398    pub git: Option<LocalGitIdentity>,
399}
400
401#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
402pub struct LocalGitIdentity {
403    pub github_login: String,
404    pub commit_name: String,
405    pub commit_email: String,
406}
407
408#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
409pub struct LocalSigningKey {
410    pub kind: String,
411    pub algorithm: String,
412    pub public_key_path: String,
413    pub private_key_ref: PrivateKeyRef,
414}
415
416#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
417pub struct LocalEncryptionKey {
418    pub kind: String,
419    pub algorithm: String,
420    pub public_key_path: String,
421    pub private_key_ref: PrivateKeyRef,
422}
423
424#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
425pub struct PrivateKeyRef {
426    pub backend: String,
427    pub path: String,
428}
429
430#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
431pub struct TrustManifest {
432    pub version: u32,
433    pub trust: TrustRecord,
434}
435
436#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
437pub struct TrustRecord {
438    pub identity_id: String,
439    pub decision: String,
440    pub trusted_at: String,
441    pub source: String,
442}
443
444#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
445pub struct SignatureManifest {
446    pub version: u32,
447    pub signature: SignatureRecord,
448}
449
450#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
451pub struct SignatureRecord {
452    pub signed_path: String,
453    pub signed_sha256: String,
454    pub signer: String,
455    pub key_id: String,
456    pub algorithm: String,
457    pub namespace: String,
458    #[serde(default, skip_serializing_if = "Option::is_none")]
459    pub context: Option<SignatureContext>,
460    pub signature: String,
461    pub created_at: String,
462}
463
464#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
465pub struct SignatureContext {
466    #[serde(default, skip_serializing_if = "Option::is_none")]
467    pub repo_id: Option<String>,
468    #[serde(default, skip_serializing_if = "Option::is_none")]
469    pub request_id: Option<String>,
470    #[serde(default, skip_serializing_if = "Option::is_none")]
471    pub message_id: Option<String>,
472    #[serde(default, skip_serializing_if = "Option::is_none")]
473    pub recipient_id: Option<String>,
474    #[serde(default, skip_serializing_if = "Option::is_none")]
475    pub purpose: Option<String>,
476}
477
478pub fn to_yaml<T: Serialize>(value: &T) -> RhoResult<String> {
479    Ok(serde_yaml::to_string(value)?)
480}
481
482pub fn from_yaml<T: for<'de> Deserialize<'de>>(value: &str) -> RhoResult<T> {
483    Ok(serde_yaml::from_str(value)?)
484}
485
486pub fn yaml_top_level_kind(value: &str) -> Option<String> {
487    let parsed: serde_yaml::Value = serde_yaml::from_str(value).ok()?;
488    let mapping = parsed.as_mapping()?;
489    mapping
490        .get(serde_yaml::Value::String("kind".to_string()))?
491        .as_str()
492        .map(ToOwned::to_owned)
493}
494
495pub fn is_rho_encrypted_text(value: &str) -> bool {
496    matches!(
497        yaml_top_level_kind(value).as_deref(),
498        Some("rho_recipient_envelope" | "rho_transparent_file")
499    )
500}
501
502pub fn to_json_pretty<T: Serialize>(value: &T) -> RhoResult<String> {
503    Ok(serde_json::to_string_pretty(value)? + "\n")
504}
505
506pub fn from_json<T: for<'de> Deserialize<'de>>(value: &str) -> RhoResult<T> {
507    Ok(serde_json::from_str(value)?)
508}
509
510pub fn validate_actor_id(value: &str) -> RhoResult<()> {
511    if let Some(handle) = value.strip_prefix("rho://id/github/") {
512        validate_simple_id(handle, "github identity handle", true)?;
513        return Ok(());
514    }
515    validate_simple_id(value, "actor id", true)
516}
517
518pub fn normalize_actor_id(value: &str) -> RhoResult<String> {
519    if value.starts_with("rho://id/") {
520        validate_actor_id(value)?;
521        return Ok(value.to_string());
522    }
523    if let Some(handle) = value.strip_prefix("github/") {
524        validate_simple_id(handle, "github identity handle", true)?;
525        return Ok(format!("rho://id/github/{handle}"));
526    }
527    validate_simple_id(value, "github identity handle", true)?;
528    Ok(format!("rho://id/github/{value}"))
529}
530
531pub fn normalize_repo_id(value: &str) -> RhoResult<String> {
532    if value.starts_with("rho://repo/") {
533        validate_relative_safe_path(value.trim_start_matches("rho://repo/"))?;
534        return Ok(value.to_string());
535    }
536    let path = value
537        .strip_prefix("https://github.com/")
538        .or_else(|| value.strip_prefix("git@github.com:"))
539        .unwrap_or(value)
540        .trim_end_matches(".git")
541        .trim_matches('/');
542    let parts: Vec<&str> = path.split('/').collect();
543    if parts.len() != 2 {
544        return Err(format!(
545            "repo id must be rho://repo/..., https://github.com/owner/repo, or owner/repo: {value}"
546        )
547        .into());
548    }
549    validate_simple_id(parts[0], "github owner", true)?;
550    validate_simple_id(parts[1], "github repo", true)?;
551    Ok(format!("rho://repo/github/{}/{}", parts[0], parts[1]))
552}
553
554pub fn validate_request_id(value: &str) -> RhoResult<()> {
555    if !value.starts_with("req-") {
556        return Err(format!("request id must start with req-: {value}").into());
557    }
558    validate_simple_id(value, "request id", true)
559}
560
561pub fn validate_run_id(value: &str) -> RhoResult<()> {
562    if !value.starts_with("run-") {
563        return Err(format!("run id must start with run-: {value}").into());
564    }
565    validate_simple_id(value, "run id", true)
566}
567
568pub fn validate_action_id(value: &str) -> RhoResult<()> {
569    if !value.starts_with("act-") {
570        return Err(format!("action id must start with act-: {value}").into());
571    }
572    validate_simple_id(value, "action id", true)
573}
574
575pub fn validate_tool_id(value: &str) -> RhoResult<()> {
576    validate_simple_id(value, "tool id", true)
577}
578
579pub fn validate_action_type(value: &str) -> RhoResult<()> {
580    match value {
581        "run_mock_data" | "run_real_data" | "release_results" => Ok(()),
582        _ => Err(format!("unsupported action type: {value}").into()),
583    }
584}
585
586pub fn validate_tier(value: &str) -> RhoResult<()> {
587    match value {
588        "public" | "mock" | "real" => Ok(()),
589        _ => Err(format!("unsupported tier: {value}").into()),
590    }
591}
592
593pub fn validate_relative_safe_path(value: &str) -> RhoResult<()> {
594    if value.is_empty() || value.contains('\0') {
595        return Err("path must be non-empty and contain no NUL bytes".into());
596    }
597    let path = Path::new(value);
598    for component in path.components() {
599        match component {
600            std::path::Component::ParentDir => {
601                return Err(format!("path must not contain ..: {value}").into());
602            }
603            std::path::Component::RootDir | std::path::Component::Prefix(_) => {
604                return Err(format!("path must be relative: {value}").into());
605            }
606            _ => {}
607        }
608    }
609    Ok(())
610}
611
612pub fn path_matches_pattern(path: &str, pattern: &str) -> bool {
613    let path = normalize_match_path(path);
614    let pattern = normalize_match_path(pattern);
615    if path.is_empty() || pattern.is_empty() {
616        return path == pattern;
617    }
618    let path_segments = path.split('/').collect::<Vec<_>>();
619    let pattern_segments = pattern.split('/').collect::<Vec<_>>();
620    match_path_segments(&path_segments, &pattern_segments)
621}
622
623fn normalize_match_path(value: &str) -> String {
624    value
625        .replace('\\', "/")
626        .trim_start_matches("./")
627        .trim_matches('/')
628        .to_string()
629}
630
631fn match_path_segments(path: &[&str], pattern: &[&str]) -> bool {
632    match pattern.split_first() {
633        None => path.is_empty(),
634        Some((first, rest)) if *first == "**" => {
635            match_path_segments(path, rest)
636                || (!path.is_empty() && match_path_segments(&path[1..], pattern))
637        }
638        Some((first, rest)) => {
639            let Some((path_first, path_rest)) = path.split_first() else {
640                return false;
641            };
642            segment_matches(path_first, first) && match_path_segments(path_rest, rest)
643        }
644    }
645}
646
647fn segment_matches(value: &str, pattern: &str) -> bool {
648    let value = value.as_bytes();
649    let pattern = pattern.as_bytes();
650    let mut value_index = 0usize;
651    let mut pattern_index = 0usize;
652    let mut star_index = None;
653    let mut value_after_star = 0usize;
654
655    while value_index < value.len() {
656        if pattern_index < pattern.len()
657            && pattern[pattern_index] != b'*'
658            && pattern[pattern_index] == value[value_index]
659        {
660            value_index += 1;
661            pattern_index += 1;
662        } else if pattern_index < pattern.len() && pattern[pattern_index] == b'*' {
663            star_index = Some(pattern_index);
664            pattern_index += 1;
665            value_after_star = value_index;
666        } else if let Some(star) = star_index {
667            pattern_index = star + 1;
668            value_after_star += 1;
669            value_index = value_after_star;
670        } else {
671            return false;
672        }
673    }
674
675    pattern[pattern_index..].iter().all(|byte| *byte == b'*')
676}
677
678fn validate_simple_id(value: &str, label: &str, allow_hyphen: bool) -> RhoResult<()> {
679    if value.is_empty() || value.len() > 96 {
680        return Err(format!("{label} must be 1-96 characters").into());
681    }
682    let valid = value.chars().all(|ch| {
683        ch.is_ascii_lowercase() || ch.is_ascii_digit() || ch == '_' || (allow_hyphen && ch == '-')
684    });
685    if !valid {
686        return Err(format!("{label} has invalid characters: {value}").into());
687    }
688    Ok(())
689}
690
691pub fn arg_value(args: &[String], flag: &str) -> Option<String> {
692    args.windows(2)
693        .find(|window| window[0] == flag)
694        .map(|window| window[1].clone())
695}
696
697pub fn has_flag(args: &[String], flag: &str) -> bool {
698    args.iter().any(|arg| arg == flag)
699}
700
701pub fn require_arg(args: &[String], flag: &str) -> RhoResult<String> {
702    arg_value(args, flag).ok_or_else(|| format!("missing required argument: {flag}").into())
703}
704
705pub fn ensure_parent(path: &Path) -> io::Result<()> {
706    if let Some(parent) = path.parent() {
707        fs::create_dir_all(parent)?;
708    }
709    Ok(())
710}
711
712pub fn copy_dir_recursive(source: &Path, target: &Path) -> io::Result<()> {
713    fs::create_dir_all(target)?;
714    for entry in fs::read_dir(source)? {
715        let entry = entry?;
716        let source_path = entry.path();
717        let target_path = target.join(entry.file_name());
718        let file_type = entry.file_type()?;
719        if file_type.is_dir() {
720            copy_dir_recursive(&source_path, &target_path)?;
721        } else if file_type.is_file() {
722            fs::copy(&source_path, &target_path)?;
723        }
724    }
725    Ok(())
726}
727
728pub fn remove_dir_if_exists(path: &Path) -> io::Result<()> {
729    if path.exists() {
730        fs::remove_dir_all(path)?;
731    }
732    Ok(())
733}
734
735pub fn yaml_quote(value: &str) -> String {
736    let escaped = value
737        .replace('\\', "\\\\")
738        .replace('"', "\\\"")
739        .replace('\n', "\\n")
740        .replace('\r', "");
741    format!("\"{escaped}\"")
742}
743
744pub fn now_rfc3339() -> String {
745    let seconds = SystemTime::now()
746        .duration_since(UNIX_EPOCH)
747        .unwrap_or_default()
748        .as_secs();
749    format_unix_seconds_rfc3339(seconds)
750}
751
752pub fn uuid_like() -> String {
753    let mut bytes = [0u8; 16];
754    getrandom::getrandom(&mut bytes).expect("secure random UUID generation failed");
755    bytes[6] = (bytes[6] & 0x0f) | 0x40;
756    bytes[8] = (bytes[8] & 0x3f) | 0x80;
757    format!(
758        "{:02x}{:02x}{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}{:02x}{:02x}{:02x}{:02x}",
759        bytes[0],
760        bytes[1],
761        bytes[2],
762        bytes[3],
763        bytes[4],
764        bytes[5],
765        bytes[6],
766        bytes[7],
767        bytes[8],
768        bytes[9],
769        bytes[10],
770        bytes[11],
771        bytes[12],
772        bytes[13],
773        bytes[14],
774        bytes[15]
775    )
776}
777
778pub fn file_digest(path: &Path) -> RhoResult<String> {
779    let mut file = File::open(path)?;
780    let mut hasher = Sha256::new();
781    let mut buffer = [0u8; 16 * 1024];
782    loop {
783        let read = file.read(&mut buffer)?;
784        if read == 0 {
785            break;
786        }
787        hasher.update(&buffer[..read]);
788    }
789    Ok(format!("{:x}", hasher.finalize()))
790}
791
792pub fn mime_type(path: &Path) -> String {
793    let Some(extension) = path
794        .extension()
795        .and_then(OsStr::to_str)
796        .map(|value| value.to_ascii_lowercase())
797    else {
798        return "application/octet-stream".to_string();
799    };
800    match extension.as_str() {
801        "csv" => "text/csv",
802        "json" => "application/json",
803        "yaml" | "yml" => "application/yaml",
804        "txt" | "md" => "text/plain",
805        "py" => "text/x-python",
806        "rs" => "text/rust",
807        "pdf" => "application/pdf",
808        "png" => "image/png",
809        "jpg" | "jpeg" => "image/jpeg",
810        "gif" => "image/gif",
811        "svg" => "image/svg+xml",
812        "zip" => "application/zip",
813        "tar" => "application/x-tar",
814        "gz" => "application/gzip",
815        "rhoenc" => "application/vnd.rho.envelope",
816        _ => "application/octet-stream",
817    }
818    .to_string()
819}
820
821fn format_unix_seconds_rfc3339(seconds: u64) -> String {
822    let days = (seconds / 86_400) as i64;
823    let seconds_of_day = seconds % 86_400;
824    let (year, month, day) = civil_from_days(days);
825    let hour = seconds_of_day / 3_600;
826    let minute = (seconds_of_day % 3_600) / 60;
827    let second = seconds_of_day % 60;
828    format!("{year:04}-{month:02}-{day:02}T{hour:02}:{minute:02}:{second:02}Z")
829}
830
831fn civil_from_days(days_since_unix_epoch: i64) -> (i64, u64, u64) {
832    let z = days_since_unix_epoch + 719_468;
833    let era = if z >= 0 { z } else { z - 146_096 } / 146_097;
834    let day_of_era = z - era * 146_097;
835    let year_of_era =
836        (day_of_era - day_of_era / 1_460 + day_of_era / 36_524 - day_of_era / 146_096) / 365;
837    let year = year_of_era + era * 400;
838    let day_of_year = day_of_era - (365 * year_of_era + year_of_era / 4 - year_of_era / 100);
839    let month_prime = (5 * day_of_year + 2) / 153;
840    let day = day_of_year - (153 * month_prime + 2) / 5 + 1;
841    let month = month_prime + if month_prime < 10 { 3 } else { -9 };
842    let year = year + if month <= 2 { 1 } else { 0 };
843    (year, month as u64, day as u64)
844}
845
846pub fn file_name(path: &Path) -> RhoResult<String> {
847    path.file_name()
848        .and_then(OsStr::to_str)
849        .map(ToOwned::to_owned)
850        .ok_or_else(|| format!("path has no valid file name: {}", path.display()).into())
851}
852
853pub fn canonical_display(path: &Path) -> String {
854    path.canonicalize()
855        .unwrap_or_else(|_| PathBuf::from(path))
856        .display()
857        .to_string()
858}
859
860pub fn read_to_string_if_exists(path: &Path) -> io::Result<Option<String>> {
861    match fs::read_to_string(path) {
862        Ok(value) => Ok(Some(value)),
863        Err(error) if error.kind() == io::ErrorKind::NotFound => Ok(None),
864        Err(error) => Err(error),
865    }
866}
867
868pub fn split_command_line(value: &str) -> RhoResult<Vec<String>> {
869    let mut parts = Vec::new();
870    let mut current = String::new();
871    let mut chars = value.chars().peekable();
872    let mut quote: Option<char> = None;
873
874    while let Some(ch) = chars.next() {
875        match (quote, ch) {
876            (Some(active), next) if next == active => quote = None,
877            (Some(_), '\\') => {
878                if let Some(next) = chars.next() {
879                    current.push(next);
880                }
881            }
882            (Some(_), next) => current.push(next),
883            (None, '\'' | '"') => quote = Some(ch),
884            (None, next) if next.is_whitespace() => {
885                if !current.is_empty() {
886                    parts.push(std::mem::take(&mut current));
887                }
888            }
889            (None, next) => current.push(next),
890        }
891    }
892
893    if quote.is_some() {
894        return Err("unterminated quote in command_text".into());
895    }
896    if !current.is_empty() {
897        parts.push(current);
898    }
899    if parts.is_empty() {
900        return Err("command_text is empty".into());
901    }
902    Ok(parts)
903}
904
905#[cfg(test)]
906mod tests {
907    use super::*;
908
909    #[test]
910    fn split_command_line_preserves_quoted_args() {
911        let parts = split_command_line("python3 \"sum prices.py\" DATASET_CSV").unwrap();
912        assert_eq!(parts, vec!["python3", "sum prices.py", "DATASET_CSV"]);
913    }
914
915    #[test]
916    fn split_command_line_rejects_unterminated_quotes() {
917        assert!(split_command_line("python3 \"unterminated").is_err());
918    }
919
920    #[test]
921    fn validates_expected_ids() {
922        assert!(validate_actor_id("agent1").is_ok());
923        assert!(validate_request_id("req-abc-123").is_ok());
924        assert!(validate_run_id("run-real-123").is_ok());
925        assert!(validate_actor_id("test-runner").is_ok());
926        assert!(validate_actor_id("Agent1").is_err());
927        assert!(validate_request_id("abc-123").is_err());
928        assert!(validate_run_id("real-123").is_err());
929    }
930
931    #[test]
932    fn rejects_unsafe_relative_paths() {
933        assert!(validate_relative_safe_path("workspace/sum_prices.py").is_ok());
934        assert!(validate_relative_safe_path("../private/data.csv").is_err());
935        assert!(validate_relative_safe_path("/tmp/data.csv").is_err());
936        assert!(validate_relative_safe_path("").is_err());
937    }
938
939    #[test]
940    fn path_patterns_are_segment_aware_globs() {
941        assert!(path_matches_pattern(
942            "rho/messages/inbox/id/github/rho-owner/request.yaml",
943            "rho/messages/inbox/id/github/rho-owner/**"
944        ));
945        assert!(path_matches_pattern(
946            "rho/messages/inbox/id/github/rho-owner",
947            "rho/messages/inbox/id/github/rho-owner/**"
948        ));
949        assert!(path_matches_pattern(
950            "rho/messages/inbox/id/github/rho-owner/req-123/request.yaml",
951            "rho/messages/inbox/id/github/*/req-*/request.yaml"
952        ));
953        assert!(path_matches_pattern(
954            "./rho/messages/inbox/id/github/rho-owner/req-123/request.yaml",
955            "rho/messages/**/request.*"
956        ));
957        assert!(!path_matches_pattern(
958            "rho/messages/inbox/id/github/rho-owner/req-123/request.yaml",
959            "rho/messages/inbox/id/github/*/request.yaml"
960        ));
961        assert!(!path_matches_pattern(
962            "rho/messages/inbox/id/github/rho-owner/req-123/request.yaml",
963            "rho/messages/inbox/id/github/rho-owner/req-124/**"
964        ));
965        assert!(!path_matches_pattern(
966            "rho/messages/inbox/id/github/rho-owner/req-123/request.yaml",
967            "rho/messages/inbox/id/github/rho-*"
968        ));
969    }
970
971    #[test]
972    fn file_digest_uses_sha256() {
973        let path = std::env::temp_dir().join(format!("rho-file-digest-{}.txt", uuid_like()));
974        fs::write(&path, b"abc").unwrap();
975        let digest = file_digest(&path).unwrap();
976        fs::remove_file(&path).unwrap();
977
978        assert_eq!(
979            digest,
980            "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"
981        );
982        assert!(!digest.starts_with("fallback-noncryptographic-"));
983    }
984
985    #[test]
986    fn formats_unix_seconds_as_rfc3339_utc() {
987        assert_eq!(format_unix_seconds_rfc3339(0), "1970-01-01T00:00:00Z");
988        assert_eq!(
989            format_unix_seconds_rfc3339(946_684_800),
990            "2000-01-01T00:00:00Z"
991        );
992        assert_eq!(
993            format_unix_seconds_rfc3339(1_609_459_200),
994            "2021-01-01T00:00:00Z"
995        );
996    }
997
998    #[test]
999    fn uuid_like_generates_uuid_v4_shape() {
1000        let value = uuid_like();
1001        assert_eq!(value.len(), 36);
1002        assert_eq!(&value[8..9], "-");
1003        assert_eq!(&value[13..14], "-");
1004        assert_eq!(&value[18..19], "-");
1005        assert_eq!(&value[23..24], "-");
1006        assert_eq!(&value[14..15], "4");
1007        assert!(matches!(&value[19..20], "8" | "9" | "a" | "b"));
1008        assert!(value.chars().all(|ch| ch.is_ascii_hexdigit() || ch == '-'));
1009    }
1010
1011    #[test]
1012    fn mime_type_uses_deterministic_extension_mapping() {
1013        assert_eq!(mime_type(Path::new("prices.csv")), "text/csv");
1014        assert_eq!(mime_type(Path::new("manifest.yaml")), "application/yaml");
1015        assert_eq!(mime_type(Path::new("report.pdf")), "application/pdf");
1016        assert_eq!(
1017            mime_type(Path::new("unknown.nope")),
1018            "application/octet-stream"
1019        );
1020    }
1021
1022    #[test]
1023    fn encrypted_envelope_detection_requires_top_level_yaml_kind() {
1024        assert!(is_rho_encrypted_text(
1025            "version: 1\nkind: rho_recipient_envelope\n"
1026        ));
1027        assert!(is_rho_encrypted_text(
1028            "version: 1\nkind: rho_transparent_file\n"
1029        ));
1030        assert!(!is_rho_encrypted_text(
1031            "message:\n  kind: rho_recipient_envelope\n"
1032        ));
1033        assert!(!is_rho_encrypted_text(
1034            "not yaml: [\nkind: rho_recipient_envelope\n"
1035        ));
1036    }
1037
1038    #[test]
1039    fn request_manifest_round_trips_as_yaml() {
1040        let manifest = RequestManifest {
1041            version: 1,
1042            request: RunRequest {
1043                id: "req-abc-123".to_string(),
1044                from: "agent2".to_string(),
1045                to: "agent1".to_string(),
1046                tool_id: "run_real".to_string(),
1047                dataset_uuid: "11111111-2222-3333-4444-555555555555".to_string(),
1048                code_paths: vec!["sandbox/two-console/shared/workspace/sum_prices.py".to_string()],
1049                code_sha256: "abc123".to_string(),
1050                command: vec![
1051                    "python3".to_string(),
1052                    "sum_prices.py".to_string(),
1053                    "DATASET_CSV".to_string(),
1054                ],
1055                requested_tier: "real".to_string(),
1056                created_at: "2026-04-30T00:00:00Z".to_string(),
1057            },
1058        };
1059
1060        let yaml = to_yaml(&manifest).unwrap();
1061        assert!(yaml.contains("command:"));
1062        assert!(!yaml.contains("command_text"));
1063        let parsed: RequestManifest = from_yaml(&yaml).unwrap();
1064        assert_eq!(parsed, manifest);
1065    }
1066
1067    #[test]
1068    fn run_manifest_omits_absent_optional_fields() {
1069        let manifest = RunManifest {
1070            version: 1,
1071            run: RunRecord {
1072                id: "run-blocked-123".to_string(),
1073                request_id: "req-abc-123".to_string(),
1074                status: "blocked".to_string(),
1075                tier: "real".to_string(),
1076                runner: None,
1077                dataset_csv: None,
1078                code_path: Some("sandbox/two-console/shared/workspace/sum_prices.py".to_string()),
1079                code_sha256: None,
1080                command: vec!["python3".to_string()],
1081                exit_code: None,
1082                error: Some("requires approval".to_string()),
1083                stdout_path: "sandbox/two-console/shared/.rho/runs/run-blocked-123/stdout.txt"
1084                    .to_string(),
1085                stderr_path: "sandbox/two-console/shared/.rho/runs/run-blocked-123/stderr.txt"
1086                    .to_string(),
1087                created_at: "2026-04-30T00:00:00Z".to_string(),
1088            },
1089        };
1090
1091        let yaml = to_yaml(&manifest).unwrap();
1092        assert!(!yaml.contains("dataset_csv"));
1093        assert!(!yaml.contains("runner"));
1094        assert!(!yaml.contains("exit_code"));
1095        assert!(yaml.contains("requires approval"));
1096    }
1097
1098    #[test]
1099    fn sandbox_run_manifest_round_trips_as_yaml() {
1100        let manifest = SandboxRunManifest {
1101            version: 1,
1102            sandbox_run: SandboxRunRecord {
1103                id: "run-run-mock".to_string(),
1104                request_id: "req-mock-123".to_string(),
1105                runner: "local".to_string(),
1106                tier: "mock".to_string(),
1107                dataset_csv: "sandbox/two-console/shared/datasets/id/mock/prices.csv".to_string(),
1108                code_path: "sandbox/two-console/shared/workspace/sum_prices.py".to_string(),
1109                command: vec![
1110                    "python3".to_string(),
1111                    "sandbox/two-console/shared/workspace/sum_prices.py".to_string(),
1112                    "sandbox/two-console/shared/datasets/id/mock/prices.csv".to_string(),
1113                ],
1114                artifact_dir: "sandbox/two-console/shared/.rho/runs/run-run-mock".to_string(),
1115                stdout_path: "sandbox/two-console/shared/.rho/runs/run-run-mock/stdout.txt"
1116                    .to_string(),
1117                stderr_path: "sandbox/two-console/shared/.rho/runs/run-run-mock/stderr.txt"
1118                    .to_string(),
1119                mounts: vec![SandboxMount {
1120                    host_path: "sandbox/two-console/shared/workspace".to_string(),
1121                    guest_path: "/workspace".to_string(),
1122                    mode: "ro".to_string(),
1123                }],
1124                network: SandboxNetworkPolicy {
1125                    default_deny: true,
1126                    allow_hosts: vec![],
1127                    tcp_maps: vec![],
1128                },
1129                created_at: "2026-04-30T00:00:00Z".to_string(),
1130            },
1131        };
1132
1133        let yaml = to_yaml(&manifest).unwrap();
1134        assert!(yaml.contains("sandbox_run:"));
1135        assert!(yaml.contains("runner: local"));
1136        let parsed: SandboxRunManifest = from_yaml(&yaml).unwrap();
1137        assert_eq!(parsed, manifest);
1138    }
1139
1140    #[test]
1141    fn controlled_action_round_trips_as_json() {
1142        let manifest = ControlledActionManifest {
1143            version: 1,
1144            kind: "controlled_action".to_string(),
1145            action: ControlledAction {
1146                action_id: "act-run-real".to_string(),
1147                request_id: "req-abc-123".to_string(),
1148                tool_id: "run_real".to_string(),
1149                requested_by: "agent2".to_string(),
1150                requested_for: "agent1".to_string(),
1151                action_type: "run_real_data".to_string(),
1152                summary: "Run approved script".to_string(),
1153                reason: "Need aggregate".to_string(),
1154                input_path: None,
1155                script_path: Some("sandbox/two-console/shared/workspace/sum_prices.py".to_string()),
1156                output_path: "sandbox/two-console/shared/.rho/runs/run-act-run-real/stdout.txt"
1157                    .to_string(),
1158            },
1159        };
1160
1161        let json = to_json_pretty(&manifest).unwrap();
1162        assert!(json.contains("\"controlled_action\""));
1163        let parsed: ControlledActionManifest = from_json(&json).unwrap();
1164        assert_eq!(parsed, manifest);
1165    }
1166
1167    #[test]
1168    fn validates_action_ids_and_types() {
1169        assert!(validate_action_id("act-run-real").is_ok());
1170        assert!(validate_action_id("run-real").is_err());
1171        assert!(validate_tool_id("run_real").is_ok());
1172        assert!(validate_action_type("run_real_data").is_ok());
1173        assert!(validate_action_type("run_mock_data").is_ok());
1174        assert!(validate_action_type("delete_private_data").is_err());
1175    }
1176
1177    #[test]
1178    fn tool_manifest_round_trips_as_yaml() {
1179        let manifest = ToolManifest {
1180            version: 1,
1181            tool: Tool {
1182                id: "run_real".to_string(),
1183                action_type: "run_real_data".to_string(),
1184                owner: "agent1".to_string(),
1185                approval_required: true,
1186                command_template: vec![
1187                    "python3".to_string(),
1188                    "CODE_PATH".to_string(),
1189                    "DATASET_CSV".to_string(),
1190                ],
1191            },
1192        };
1193        let yaml = to_yaml(&manifest).unwrap();
1194        assert!(yaml.contains("run_real_data"));
1195        assert!(yaml.contains("command_template"));
1196        let parsed: ToolManifest = from_yaml(&yaml).unwrap();
1197        assert_eq!(parsed, manifest);
1198    }
1199
1200    #[test]
1201    fn action_grant_manifest_round_trips_as_yaml() {
1202        let manifest = ActionGrantManifest {
1203            version: 1,
1204            action_grant: ActionGrant {
1205                action_id: "act-run-real".to_string(),
1206                request_id: "req-abc-123".to_string(),
1207                tool_id: "run_real".to_string(),
1208                action_type: "run_real_data".to_string(),
1209                decision: "approved".to_string(),
1210                granted_by: "agent1".to_string(),
1211                created_at: "2026-04-30T00:00:00Z".to_string(),
1212                action: GrantedActionFile {
1213                    path: "sandbox/two-console/control/outbox/act-run-real.json".to_string(),
1214                    sha256: "action-sha".to_string(),
1215                },
1216                repo: GrantedRepoState {
1217                    git_commit: Some("commit-sha".to_string()),
1218                },
1219                inputs: vec![GrantedInput {
1220                    kind: "code".to_string(),
1221                    path: "sandbox/two-console/shared/workspace/sum_prices.py".to_string(),
1222                    sha256: "input-sha".to_string(),
1223                }],
1224            },
1225        };
1226
1227        let yaml = to_yaml(&manifest).unwrap();
1228        assert!(yaml.contains("action_grant:"));
1229        assert!(yaml.contains("action-sha"));
1230        assert!(yaml.contains("input-sha"));
1231        let parsed: ActionGrantManifest = from_yaml(&yaml).unwrap();
1232        assert_eq!(parsed, manifest);
1233    }
1234
1235    #[test]
1236    fn github_provider_parses_standard_and_alias_remotes() {
1237        use crate::providers::github::repo_candidate_from_remote_with_host_resolver;
1238
1239        assert_eq!(
1240            repo_candidate_from_remote_with_host_resolver(
1241                "git@github.com:madhavajay/rho-live.git",
1242                |_| false,
1243            ),
1244            Some("madhavajay/rho-live".to_string())
1245        );
1246        assert_eq!(
1247            repo_candidate_from_remote_with_host_resolver(
1248                "git@github-madhavajay:madhavajay/rho-live.git",
1249                |host| host == "github-madhavajay",
1250            ),
1251            Some("madhavajay/rho-live".to_string())
1252        );
1253        assert_eq!(
1254            repo_candidate_from_remote_with_host_resolver(
1255                "git@example.com:madhavajay/rho-live.git",
1256                |_| false,
1257            ),
1258            None
1259        );
1260    }
1261}