Skip to main content

coding_agent_search/sources/
probe.rs

1//! SSH host probing for remote source setup.
2//!
3//! This module provides functionality to probe SSH hosts and gather comprehensive
4//! information needed for remote source configuration decisions:
5//! - Whether cass is installed (and what version)
6//! - Index status (session count)
7//! - Detected agent session data directories
8//! - System information (OS, architecture)
9//! - Resource availability (disk space, memory)
10//!
11//! # Design
12//!
13//! Probing uses a single SSH session per host to minimize latency. A bash probe
14//! script is piped to `bash -s` on the remote, gathering all information in one
15//! round-trip.
16//!
17//! # Example
18//!
19//! ```rust,ignore
20//! use coding_agent_search::sources::probe::{probe_host, probe_hosts_parallel};
21//! use coding_agent_search::sources::config::DiscoveredHost;
22//!
23//! // Single host probe (returns HostProbeResult directly, not Result)
24//! let host = DiscoveredHost { name: "laptop".into(), .. };
25//! let result = probe_host(&host, 10);
26//! if result.reachable {
27//!     println!("Connected in {}ms", result.connection_time_ms);
28//! }
29//!
30//! // Parallel probing with progress (synchronous, uses rayon internally)
31//! let results = probe_hosts_parallel(&hosts, 10, |done, total, name| {
32//!     println!("Probing {}/{}: {}", done, total, name);
33//! });
34//! ```
35
36use std::collections::HashMap;
37use std::process::{Command, Stdio};
38use std::time::{Duration, Instant};
39
40use serde::{Deserialize, Serialize};
41
42use super::{
43    config::DiscoveredHost, host_key_verification_error, is_host_key_verification_failure,
44    strict_ssh_cli_tokens, wait_for_child_output_with_timeout,
45};
46
47/// Default connection timeout in seconds.
48pub const DEFAULT_PROBE_TIMEOUT: u64 = 10;
49
50/// Result of probing an SSH host.
51#[derive(Debug, Clone, Serialize, Deserialize)]
52pub struct HostProbeResult {
53    /// SSH config host alias.
54    pub host_name: String,
55    /// Whether the host was reachable via SSH.
56    pub reachable: bool,
57    /// Connection time in milliseconds.
58    pub connection_time_ms: u64,
59    /// Status of cass installation on the remote.
60    pub cass_status: CassStatus,
61    /// Detected agent session directories.
62    pub detected_agents: Vec<DetectedAgent>,
63    /// System information.
64    pub system_info: Option<SystemInfo>,
65    /// Resource information (disk/memory).
66    pub resources: Option<ResourceInfo>,
67    /// Error message if probe failed.
68    pub error: Option<String>,
69}
70
71impl HostProbeResult {
72    /// Create a result for an unreachable host.
73    pub fn unreachable(host_name: &str, error: impl Into<String>) -> Self {
74        Self {
75            host_name: host_name.to_string(),
76            reachable: false,
77            connection_time_ms: 0,
78            cass_status: CassStatus::Unknown,
79            detected_agents: Vec::new(),
80            system_info: None,
81            resources: None,
82            error: Some(error.into()),
83        }
84    }
85
86    /// Check if cass is installed on this host.
87    pub fn has_cass(&self) -> bool {
88        self.cass_status.is_installed()
89    }
90
91    /// Check if this host has any agent session data.
92    pub fn has_agent_data(&self) -> bool {
93        !self.detected_agents.is_empty()
94    }
95
96    /// Get total estimated sessions across all detected agents.
97    pub fn total_sessions(&self) -> u64 {
98        self.detected_agents
99            .iter()
100            .filter_map(|a| a.estimated_sessions)
101            .sum()
102    }
103}
104
105/// Status of cass installation on a remote host.
106#[derive(Debug, Clone, Serialize, Deserialize)]
107#[serde(tag = "status", rename_all = "snake_case")]
108pub enum CassStatus {
109    /// cass is installed and has an indexed database.
110    Indexed {
111        version: String,
112        session_count: u64,
113        last_indexed: Option<String>,
114    },
115    /// cass is installed but no index exists or is empty.
116    InstalledNotIndexed { version: String },
117    /// cass is not found on PATH.
118    NotFound,
119    /// Couldn't determine cass status.
120    Unknown,
121}
122
123impl CassStatus {
124    /// Check if cass is installed (any version).
125    pub fn is_installed(&self) -> bool {
126        matches!(
127            self,
128            CassStatus::Indexed { .. } | CassStatus::InstalledNotIndexed { .. }
129        )
130    }
131
132    /// Get the installed version if available.
133    pub fn version(&self) -> Option<&str> {
134        match self {
135            CassStatus::Indexed { version, .. } | CassStatus::InstalledNotIndexed { version } => {
136                Some(version)
137            }
138            _ => None,
139        }
140    }
141}
142
143/// Detected agent session data on a remote host.
144#[derive(Debug, Clone, Serialize, Deserialize)]
145pub struct DetectedAgent {
146    /// Type of agent (claude_code, codex, cursor, etc.).
147    pub agent_type: String,
148    /// Path to the agent's session directory.
149    pub path: String,
150    /// Estimated number of sessions (from file count).
151    pub estimated_sessions: Option<u64>,
152    /// Estimated size in megabytes.
153    pub estimated_size_mb: Option<u64>,
154}
155
156/// System information gathered from remote host.
157#[derive(Debug, Clone, Serialize, Deserialize)]
158pub struct SystemInfo {
159    /// Operating system (linux, darwin).
160    pub os: String,
161    /// CPU architecture (x86_64, aarch64).
162    pub arch: String,
163    /// Linux distro name if available.
164    pub distro: Option<String>,
165    /// Whether cargo is available.
166    pub has_cargo: bool,
167    /// Whether cargo-binstall is available.
168    pub has_cargo_binstall: bool,
169    /// Whether curl is available.
170    pub has_curl: bool,
171    /// Whether wget is available.
172    pub has_wget: bool,
173    /// Remote home directory.
174    pub remote_home: String,
175    /// Unique machine identifier (for deduplication of SSH aliases).
176    /// On Linux: /etc/machine-id, on macOS: IOPlatformUUID.
177    #[serde(default)]
178    pub machine_id: Option<String>,
179}
180
181/// Resource information for installation feasibility.
182#[derive(Debug, Clone, Serialize, Deserialize)]
183pub struct ResourceInfo {
184    /// Available disk space in MB (in home directory).
185    pub disk_available_mb: u64,
186    /// Total memory in MB.
187    pub memory_total_mb: u64,
188    /// Available memory in MB.
189    pub memory_available_mb: u64,
190    /// Heuristic: enough resources to compile Rust.
191    pub can_compile: bool,
192}
193
194impl ResourceInfo {
195    /// Minimum disk space (MB) recommended for cass installation.
196    pub const MIN_DISK_MB: u64 = 1024; // 1 GB
197
198    /// Minimum memory (MB) recommended for compilation.
199    pub const MIN_MEMORY_MB: u64 = 2048; // 2 GB
200}
201
202fn shell_single_quote_arg(value: &str) -> String {
203    format!("'{}'", value.replace('\'', r#"'\''"#))
204}
205
206fn collect_probe_dirs(probe_paths: Vec<(&'static str, Vec<String>)>) -> Vec<String> {
207    let mut dir_list = Vec::new();
208    for (_slug, paths) in probe_paths {
209        for path in paths {
210            dir_list.push(path);
211        }
212    }
213    dir_list.sort();
214    dir_list.dedup();
215    dir_list
216}
217
218fn probe_dir_array_entries(dir_list: &[String]) -> String {
219    dir_list
220        .iter()
221        .map(|path| format!("    {}", shell_single_quote_arg(path)))
222        .collect::<Vec<_>>()
223        .join("\n")
224}
225
226/// Build the bash probe script that gathers all information in one SSH call.
227///
228/// Agent detection paths are sourced dynamically from `franken_agent_detection`
229/// so that new connectors are automatically included in SSH probes.
230///
231/// Output format is key=value pairs, with special markers for sections.
232fn build_probe_script() -> String {
233    let dir_list = collect_probe_dirs(franken_agent_detection::default_probe_paths_tilde());
234    build_probe_script_for_dirs(&dir_list)
235}
236
237fn build_probe_script_for_dirs(dir_list: &[String]) -> String {
238    let dirs_str = probe_dir_array_entries(dir_list);
239
240    format!(
241        r#"#!/bin/bash
242echo "===PROBE_START==="
243
244# System info
245echo "OS=$(uname -s | tr '[:upper:]' '[:lower:]')"
246echo "ARCH=$(uname -m)"
247echo "HOME=$HOME"
248
249# Distro detection (Linux only)
250if [ -f /etc/os-release ]; then
251    . /etc/os-release
252    echo "DISTRO=$PRETTY_NAME"
253fi
254
255# Machine ID for deduplication of SSH aliases pointing to same host
256# Linux: /etc/machine-id, macOS: IOPlatformUUID
257if [ -f /etc/machine-id ]; then
258    MACHINE_ID=$(cat /etc/machine-id 2>/dev/null | tr -d '\n')
259    echo "MACHINE_ID=$MACHINE_ID"
260elif command -v ioreg &> /dev/null; then
261    MACHINE_ID=$(ioreg -rd1 -c IOPlatformExpertDevice 2>/dev/null | awk -F'"' '/IOPlatformUUID/{{print $4}}')
262    echo "MACHINE_ID=$MACHINE_ID"
263fi
264
265# Cass status - check PATH and common install locations
266# Non-interactive SSH doesn't source .bashrc, so user bin dirs may not be in PATH
267CASS_BIN=""
268if command -v cass &> /dev/null; then
269    CASS_BIN="cass"
270elif [ -x "$HOME/.cargo/bin/cass" ]; then
271    CASS_BIN="$HOME/.cargo/bin/cass"
272elif [ -x "$HOME/.local/bin/cass" ]; then
273    CASS_BIN="$HOME/.local/bin/cass"
274elif [ -x "/usr/local/bin/cass" ]; then
275    CASS_BIN="/usr/local/bin/cass"
276fi
277
278if [ -n "$CASS_BIN" ]; then
279    CASS_VER=$("$CASS_BIN" --version 2>/dev/null | head -1 | awk '{{print $2}}')
280    if [ -z "$CASS_VER" ]; then
281        # Binary exists but version command failed - treat as not found
282        echo "CASS_VERSION=NOT_FOUND"
283    else
284        echo "CASS_VERSION=$CASS_VER"
285
286        # Get health status (JSON output) - only if version was detected
287        if "$CASS_BIN" health --json &>/dev/null; then
288            echo "CASS_HEALTH=OK"
289            # Try to get session count from stats
290            STATS=$("$CASS_BIN" stats --json 2>/dev/null)
291            if [ $? -eq 0 ] && [ -n "$STATS" ]; then
292                # Extract total conversations from JSON (allow whitespace/newlines)
293                SESSIONS=$(echo "$STATS" | tr -d '\n' | sed -n 's/.*"conversations"[[:space:]]*:[[:space:]]*\([0-9][0-9]*\).*/\1/p')
294                echo "CASS_SESSIONS=${{SESSIONS:-0}}"
295            else
296                echo "CASS_SESSIONS=0"
297            fi
298        else
299            echo "CASS_HEALTH=NOT_INDEXED"
300        fi
301    fi
302else
303    echo "CASS_VERSION=NOT_FOUND"
304fi
305
306# Tool availability - also check ~/.cargo/bin for non-interactive SSH sessions
307if command -v cargo &> /dev/null || [ -x "$HOME/.cargo/bin/cargo" ]; then
308    echo "HAS_CARGO=1"
309else
310    echo "HAS_CARGO=0"
311fi
312if command -v cargo-binstall &> /dev/null || [ -x "$HOME/.cargo/bin/cargo-binstall" ]; then
313    echo "HAS_BINSTALL=1"
314else
315    echo "HAS_BINSTALL=0"
316fi
317command -v curl &> /dev/null && echo "HAS_CURL=1" || echo "HAS_CURL=0"
318command -v wget &> /dev/null && echo "HAS_WGET=1" || echo "HAS_WGET=0"
319
320# Resource info - disk (in KB, converted later)
321DISK_KB=$(df -k ~ 2>/dev/null | awk 'NR==2 {{print $4}}')
322echo "DISK_AVAIL_KB=${{DISK_KB:-0}}"
323
324# Memory info (Linux)
325if [ -f /proc/meminfo ]; then
326    MEM_TOTAL=$(grep MemTotal /proc/meminfo 2>/dev/null | awk '{{print $2}}')
327    MEM_AVAIL=$(grep MemAvailable /proc/meminfo 2>/dev/null | awk '{{print $2}}')
328    echo "MEM_TOTAL_KB=${{MEM_TOTAL:-0}}"
329    echo "MEM_AVAIL_KB=${{MEM_AVAIL:-0}}"
330else
331    # macOS - use sysctl
332    if command -v sysctl &> /dev/null; then
333        MEM_BYTES=$(sysctl -n hw.memsize 2>/dev/null)
334        MEM_KB=$((MEM_BYTES / 1024))
335        echo "MEM_TOTAL_KB=${{MEM_KB:-0}}"
336        echo "MEM_AVAIL_KB=${{MEM_KB:-0}}"  # macOS doesn't have easy available mem
337    fi
338fi
339
340# Agent data detection (with sizes and file counts)
341PROBE_DIRS=(
342{dirs}
343)
344for dir in "${{PROBE_DIRS[@]}}"; do
345    # Expand only the leading tilde marker from our static probe list. Do not
346    # eval paths: connector-owned paths can contain shell metacharacters.
347    case "$dir" in
348        "~") expanded_dir="$HOME" ;;
349        "~/"*) expanded_dir="$HOME/${{dir#\~/}}" ;;
350        *) expanded_dir="$dir" ;;
351    esac
352    if [ -e "$expanded_dir" ]; then
353        SIZE=$(du -sm "$expanded_dir" 2>/dev/null | cut -f1)
354        # Count JSONL files for session estimate
355        if [ -d "$expanded_dir" ]; then
356            # Keep probe bounded for very large trees: depth-limit and timeout when available.
357            if command -v timeout &> /dev/null; then
358                COUNT=$(timeout 5s find "$expanded_dir" -maxdepth 8 \( -name "*.jsonl" -o -name "*.json" \) 2>/dev/null | wc -l | tr -d ' ')
359            elif command -v gtimeout &> /dev/null; then
360                COUNT=$(gtimeout 5s find "$expanded_dir" -maxdepth 8 \( -name "*.jsonl" -o -name "*.json" \) 2>/dev/null | wc -l | tr -d ' ')
361            else
362                COUNT=$(find "$expanded_dir" -maxdepth 8 \( -name "*.jsonl" -o -name "*.json" \) 2>/dev/null | wc -l | tr -d ' ')
363            fi
364        else
365            COUNT=1  # Single file
366        fi
367        echo "AGENT_DATA=$dir|${{SIZE:-0}}|${{COUNT:-0}}"
368    fi
369done
370
371echo "===PROBE_END==="
372"#,
373        dirs = dirs_str
374    )
375}
376
377/// Probe a single SSH host.
378///
379/// Runs a comprehensive probe script via SSH to gather system info, cass status,
380/// and detected agent data. Uses a single SSH session for efficiency.
381///
382/// # Arguments
383/// * `host` - The discovered SSH host to probe
384/// * `timeout_secs` - Connection timeout in seconds
385///
386/// # Returns
387/// A `HostProbeResult` with all gathered information, or error details if probe failed.
388pub fn probe_host(host: &DiscoveredHost, timeout_secs: u64) -> HostProbeResult {
389    let start = Instant::now();
390    let timeout_secs = timeout_secs.max(1);
391    let command_timeout = Duration::from_secs(timeout_secs);
392
393    // Build SSH command with strict host key verification.
394    // Security-first: do not auto-trust unknown hosts during probing.
395    // Use the host alias directly (SSH config handles Port, User, IdentityFile, ProxyJump, etc.)
396    let mut cmd = Command::new("ssh");
397    cmd.args(strict_ssh_cli_tokens(timeout_secs))
398        .arg("--")
399        .arg(&host.name)
400        .arg("bash -s")
401        .stdin(Stdio::piped())
402        .stdout(Stdio::piped())
403        .stderr(Stdio::piped());
404
405    // Spawn the process and write probe script to stdin
406    let mut child = match cmd.spawn() {
407        Ok(c) => c,
408        Err(e) => {
409            return HostProbeResult::unreachable(
410                &host.name,
411                format!("Failed to execute ssh: {}", e),
412            );
413        }
414    };
415
416    // Write probe script to stdin
417    let probe_script = build_probe_script();
418    let write_error = if let Some(mut stdin) = child.stdin.take() {
419        use std::io::Write;
420        stdin.write_all(probe_script.as_bytes()).err()
421    } else {
422        None
423    };
424
425    // Wait for completion
426    let output = match wait_for_child_output_with_timeout(child, command_timeout) {
427        Ok(Some(o)) => o,
428        Ok(None) => {
429            return HostProbeResult::unreachable(
430                &host.name,
431                format!("Connection timed out after {timeout_secs} seconds"),
432            );
433        }
434        Err(e) => {
435            return HostProbeResult::unreachable(&host.name, format!("SSH command failed: {}", e));
436        }
437    };
438
439    let connection_time_ms = start.elapsed().as_millis() as u64;
440
441    // Check for SSH failures
442    if !output.status.success() {
443        let stderr = String::from_utf8_lossy(&output.stderr);
444        let error_msg = if stderr.contains("Connection refused") {
445            "Connection refused".to_string()
446        } else if stderr.contains("Connection timed out") || stderr.contains("timed out") {
447            "Connection timed out".to_string()
448        } else if stderr.contains("Permission denied") {
449            "Permission denied (key not loaded in ssh-agent?)".to_string()
450        } else if is_host_key_verification_failure(&stderr) {
451            host_key_verification_error(&host.name)
452        } else if stderr.contains("No route to host") {
453            "No route to host".to_string()
454        } else {
455            format!("SSH failed: {}", stderr.trim())
456        };
457
458        return HostProbeResult::unreachable(&host.name, error_msg);
459    }
460    if let Some(e) = write_error {
461        return HostProbeResult::unreachable(
462            &host.name,
463            format!("Failed to write probe script: {}", e),
464        );
465    }
466
467    // Parse successful output
468    let stdout = String::from_utf8_lossy(&output.stdout);
469    parse_probe_output(&host.name, &stdout, connection_time_ms)
470}
471
472/// Parse the probe script output into a HostProbeResult.
473fn parse_probe_output(host_name: &str, output: &str, connection_time_ms: u64) -> HostProbeResult {
474    let mut values: HashMap<String, String> = HashMap::new();
475    let mut agent_data: Vec<(String, u64, u64)> = Vec::new(); // (path, size_mb, count)
476
477    // Parse only key=value pairs emitted by the probe script itself. SSH login
478    // banners, forced-command wrappers, or shell noise can appear before or
479    // after the markers and must not override the measured values.
480    let mut inside_probe = false;
481    let mut saw_start = false;
482    let mut saw_end = false;
483    for line in output.lines() {
484        let line = line.trim();
485        if line == "===PROBE_START===" {
486            if saw_start {
487                return HostProbeResult::unreachable(host_name, "Probe script output malformed");
488            }
489            saw_start = true;
490            inside_probe = true;
491            continue;
492        }
493        if line == "===PROBE_END===" {
494            if !inside_probe {
495                return HostProbeResult::unreachable(host_name, "Probe script output malformed");
496            }
497            saw_end = true;
498            break;
499        }
500        if !inside_probe || line.is_empty() || line.starts_with("===") {
501            continue;
502        }
503
504        if line.starts_with("AGENT_DATA=") {
505            // Special handling for agent data: AGENT_DATA=path|size|count
506            if let Some(data) = line.strip_prefix("AGENT_DATA=") {
507                // Use rsplitn to handle paths containing pipes (parse from right)
508                // Yields: count, size, path
509                let parts: Vec<&str> = data.rsplitn(3, '|').collect();
510                if parts.len() == 3 {
511                    let count = parts[0].parse().unwrap_or(0);
512                    let size = parts[1].parse().unwrap_or(0);
513                    let path = parts[2].to_string();
514                    agent_data.push((path, size, count));
515                }
516            }
517        } else if let Some((key, value)) = line.split_once('=') {
518            values.insert(key.to_string(), value.to_string());
519        }
520    }
521
522    if !saw_start || !saw_end {
523        return HostProbeResult::unreachable(host_name, "Probe script output malformed");
524    }
525
526    // Build CassStatus
527    let cass_status = if let Some(version) = values.get("CASS_VERSION") {
528        if version == "NOT_FOUND" {
529            CassStatus::NotFound
530        } else {
531            let health = values.get("CASS_HEALTH").map(|s| s.as_str());
532            if health == Some("OK") {
533                let sessions = values
534                    .get("CASS_SESSIONS")
535                    .and_then(|s| s.parse().ok())
536                    .unwrap_or(0);
537                CassStatus::Indexed {
538                    version: version.clone(),
539                    session_count: sessions,
540                    last_indexed: None,
541                }
542            } else {
543                CassStatus::InstalledNotIndexed {
544                    version: version.clone(),
545                }
546            }
547        }
548    } else {
549        CassStatus::Unknown
550    };
551
552    // Build SystemInfo
553    let system_info = values.get("OS").map(|os| SystemInfo {
554        os: os.clone(),
555        arch: values.get("ARCH").cloned().unwrap_or_default(),
556        distro: values.get("DISTRO").cloned(),
557        has_cargo: values.get("HAS_CARGO").map(|v| v == "1").unwrap_or(false),
558        has_cargo_binstall: values
559            .get("HAS_BINSTALL")
560            .map(|v| v == "1")
561            .unwrap_or(false),
562        has_curl: values.get("HAS_CURL").map(|v| v == "1").unwrap_or(false),
563        has_wget: values.get("HAS_WGET").map(|v| v == "1").unwrap_or(false),
564        remote_home: values.get("HOME").cloned().unwrap_or_default(),
565        machine_id: values.get("MACHINE_ID").cloned().filter(|s| !s.is_empty()),
566    });
567
568    // Build ResourceInfo
569    let resources = {
570        let disk_kb = values
571            .get("DISK_AVAIL_KB")
572            .and_then(|s| s.parse::<u64>().ok())
573            .unwrap_or(0);
574        let mem_total_kb = values
575            .get("MEM_TOTAL_KB")
576            .and_then(|s| s.parse::<u64>().ok())
577            .unwrap_or(0);
578        let mem_avail_kb = values
579            .get("MEM_AVAIL_KB")
580            .and_then(|s| s.parse::<u64>().ok())
581            .unwrap_or(0);
582
583        if disk_kb > 0 || mem_total_kb > 0 {
584            let disk_mb = disk_kb / 1024;
585            let mem_total_mb = mem_total_kb / 1024;
586            let mem_avail_mb = mem_avail_kb / 1024;
587
588            Some(ResourceInfo {
589                disk_available_mb: disk_mb,
590                memory_total_mb: mem_total_mb,
591                memory_available_mb: mem_avail_mb,
592                can_compile: disk_mb >= ResourceInfo::MIN_DISK_MB
593                    && mem_total_mb >= ResourceInfo::MIN_MEMORY_MB,
594            })
595        } else {
596            None
597        }
598    };
599
600    // Build DetectedAgents
601    let detected_agents: Vec<DetectedAgent> = agent_data
602        .into_iter()
603        .map(|(path, size_mb, count)| {
604            let agent_type = infer_agent_type(&path);
605            DetectedAgent {
606                agent_type,
607                path,
608                estimated_sessions: Some(count),
609                estimated_size_mb: Some(size_mb),
610            }
611        })
612        .collect();
613
614    HostProbeResult {
615        host_name: host_name.to_string(),
616        reachable: true,
617        connection_time_ms,
618        cass_status,
619        detected_agents,
620        system_info,
621        resources,
622        error: None,
623    }
624}
625
626/// Infer agent type from path.
627///
628/// Note: More specific patterns must be checked first (e.g., `saoudrizwan.claude-dev`
629/// contains `claude` so Cline must be checked before Claude Code).
630fn infer_agent_type(path: &str) -> String {
631    // Check Cline first - it contains "claude-dev" which could match ".claude"
632    if path.contains("saoudrizwan.claude-dev") || path.contains("rooveterinaryinc.roo-cline") {
633        "cline".to_string()
634    } else if path.contains(".claude") {
635        "claude_code".to_string()
636    } else if path.contains(".codex") {
637        "codex".to_string()
638    } else if path.contains(".cursor") || path.contains("Cursor") {
639        "cursor".to_string()
640    } else if path.contains(".gemini") {
641        "gemini".to_string()
642    } else if path.contains("/.pi/") || path.ends_with("/.pi") {
643        "pi_agent".to_string()
644    } else if path.contains(".aider") {
645        "aider".to_string()
646    } else if path.contains("opencode") {
647        "opencode".to_string()
648    } else if path.contains(".goose") {
649        "goose".to_string()
650    } else if path.contains("copilot-chat")
651        || path.contains("gh-copilot")
652        || path.contains("gh/copilot")
653    {
654        "copilot".to_string()
655    } else if path.contains(".continue") {
656        "continue".to_string()
657    } else if path.contains("sourcegraph.amp") || path.contains("/amp/") || path.ends_with("/amp") {
658        "amp".to_string()
659    } else if path.contains(".clawdbot") {
660        "clawdbot".to_string()
661    } else if path.contains(".factory") {
662        "factory".to_string()
663    } else if path.contains(".vibe") {
664        "vibe".to_string()
665    } else if path.contains(".windsurf") {
666        "windsurf".to_string()
667    } else {
668        "unknown".to_string()
669    }
670}
671
672/// Probe multiple hosts in parallel.
673///
674/// Uses rayon's parallel iterator to probe hosts concurrently, calling the
675/// progress callback as each probe completes.
676///
677/// # Arguments
678/// * `hosts` - Slice of discovered hosts to probe
679/// * `timeout_secs` - Connection timeout per host
680/// * `on_progress` - Callback called after each host completes: (completed, total, host_name)
681///
682/// # Returns
683/// Vector of probe results for all hosts.
684pub fn probe_hosts_parallel<F>(
685    hosts: &[DiscoveredHost],
686    timeout_secs: u64,
687    on_progress: F,
688) -> Vec<HostProbeResult>
689where
690    F: Fn(usize, usize, &str) + Send + Sync,
691{
692    use rayon::prelude::*;
693    use std::sync::Arc;
694    use std::sync::atomic::{AtomicUsize, Ordering};
695
696    let total = hosts.len();
697    let completed = Arc::new(AtomicUsize::new(0));
698    let on_progress = Arc::new(on_progress);
699
700    // Use rayon for true parallel execution
701    hosts
702        .par_iter()
703        .map(|host| {
704            let result = probe_host(host, timeout_secs);
705
706            let done = completed.fetch_add(1, Ordering::SeqCst) + 1;
707            on_progress(done, total, &host.name);
708
709            result
710        })
711        .collect()
712}
713
714/// Cache for probe results to avoid repeated probing.
715///
716/// Note: Use `ProbeCache::new(ttl_secs)` to create a cache. The `Default`
717/// implementation uses a 5-minute TTL.
718#[derive(Debug)]
719pub struct ProbeCache {
720    results: HashMap<String, (HostProbeResult, std::time::Instant)>,
721    ttl_secs: u64,
722}
723
724impl Default for ProbeCache {
725    fn default() -> Self {
726        Self::new(300) // 5-minute default TTL
727    }
728}
729
730impl ProbeCache {
731    /// Create a new cache with the specified TTL in seconds.
732    pub fn new(ttl_secs: u64) -> Self {
733        Self {
734            results: HashMap::new(),
735            ttl_secs,
736        }
737    }
738
739    /// Get a cached result if still valid.
740    pub fn get(&self, host_name: &str) -> Option<&HostProbeResult> {
741        self.results.get(host_name).and_then(|(result, ts)| {
742            if ts.elapsed().as_secs() < self.ttl_secs {
743                Some(result)
744            } else {
745                None
746            }
747        })
748    }
749
750    /// Insert a result into the cache.
751    pub fn insert(&mut self, result: HostProbeResult) {
752        self.results.insert(
753            result.host_name.clone(),
754            (result, std::time::Instant::now()),
755        );
756    }
757
758    /// Clear expired entries.
759    pub fn clear_expired(&mut self) {
760        self.results
761            .retain(|_, (_, ts)| ts.elapsed().as_secs() < self.ttl_secs);
762    }
763}
764
765/// Deduplicate probe results that point to the same physical machine.
766///
767/// Multiple SSH aliases may point to the same machine. This function identifies
768/// duplicates using the machine_id from the probe and keeps only one entry per
769/// physical machine.
770///
771/// # Selection criteria (when duplicates found)
772/// 1. Prefer hosts with cass already installed
773/// 2. Prefer hosts with more sessions indexed
774/// 3. Otherwise, keep the first one alphabetically
775///
776/// # Returns
777/// A tuple of (deduplicated results, merged aliases map).
778/// The merged map contains: kept_host_name -> vec![merged_alias_names]
779pub fn deduplicate_probe_results(
780    results: Vec<HostProbeResult>,
781) -> (Vec<HostProbeResult>, HashMap<String, Vec<String>>) {
782    // Group by machine_id (skip hosts without machine_id - can't dedupe them)
783    let mut by_machine_id: HashMap<String, Vec<HostProbeResult>> = HashMap::new();
784    let mut no_machine_id: Vec<HostProbeResult> = Vec::new();
785
786    for result in results {
787        if let Some(ref machine_id) = result
788            .system_info
789            .as_ref()
790            .and_then(|s| s.machine_id.clone())
791        {
792            by_machine_id
793                .entry(machine_id.clone())
794                .or_default()
795                .push(result);
796        } else {
797            no_machine_id.push(result);
798        }
799    }
800
801    let mut deduplicated: Vec<HostProbeResult> = Vec::new();
802    let mut merged_aliases: HashMap<String, Vec<String>> = HashMap::new();
803
804    // Process groups with machine_id
805    for (_machine_id, mut group) in by_machine_id {
806        if group.len() == 1 {
807            deduplicated.push(group.remove(0));
808        } else {
809            // Multiple aliases for same machine - pick the best one
810            group.sort_by(|a, b| {
811                // 1. Prefer installed cass
812                let a_installed = a.cass_status.is_installed();
813                let b_installed = b.cass_status.is_installed();
814                if a_installed != b_installed {
815                    return b_installed.cmp(&a_installed);
816                }
817
818                // 2. Prefer more sessions
819                let a_sessions = match &a.cass_status {
820                    CassStatus::Indexed { session_count, .. } => *session_count,
821                    _ => 0,
822                };
823                let b_sessions = match &b.cass_status {
824                    CassStatus::Indexed { session_count, .. } => *session_count,
825                    _ => 0,
826                };
827                if a_sessions != b_sessions {
828                    return b_sessions.cmp(&a_sessions);
829                }
830
831                // 3. Alphabetically by name
832                a.host_name.cmp(&b.host_name)
833            });
834
835            // Keep the first (best) one, record others as merged
836            let kept = group.remove(0);
837            let merged: Vec<String> = group.into_iter().map(|h| h.host_name).collect();
838
839            if !merged.is_empty() {
840                merged_aliases.insert(kept.host_name.clone(), merged);
841            }
842            deduplicated.push(kept);
843        }
844    }
845
846    // Add back hosts without machine_id
847    deduplicated.extend(no_machine_id);
848
849    // Sort final list by name for consistent ordering
850    deduplicated.sort_by(|a, b| a.host_name.cmp(&b.host_name));
851
852    (deduplicated, merged_aliases)
853}
854
855#[cfg(test)]
856mod tests {
857    use super::*;
858
859    #[test]
860    fn test_cass_status_is_installed() {
861        assert!(
862            CassStatus::Indexed {
863                version: "0.1.50".into(),
864                session_count: 100,
865                last_indexed: None
866            }
867            .is_installed()
868        );
869
870        assert!(
871            CassStatus::InstalledNotIndexed {
872                version: "0.1.50".into()
873            }
874            .is_installed()
875        );
876
877        assert!(!CassStatus::NotFound.is_installed());
878        assert!(!CassStatus::Unknown.is_installed());
879    }
880
881    #[test]
882    fn test_cass_status_version() {
883        assert_eq!(
884            CassStatus::Indexed {
885                version: "0.1.50".into(),
886                session_count: 0,
887                last_indexed: None
888            }
889            .version(),
890            Some("0.1.50")
891        );
892
893        assert_eq!(
894            CassStatus::InstalledNotIndexed {
895                version: "0.1.49".into()
896            }
897            .version(),
898            Some("0.1.49")
899        );
900
901        assert_eq!(CassStatus::NotFound.version(), None);
902    }
903
904    #[test]
905    fn test_infer_agent_type() {
906        assert_eq!(infer_agent_type("~/.claude/projects"), "claude_code");
907        assert_eq!(infer_agent_type("~/.codex/sessions"), "codex");
908        assert_eq!(infer_agent_type("~/.cursor"), "cursor");
909        assert_eq!(infer_agent_type("~/.gemini/tmp"), "gemini");
910        assert_eq!(
911            infer_agent_type("~/.config/Code/User/globalStorage/saoudrizwan.claude-dev"),
912            "cline"
913        );
914        assert_eq!(
915            infer_agent_type("~/.config/Code/User/globalStorage/github.copilot-chat"),
916            "copilot"
917        );
918        assert_eq!(infer_agent_type("~/.config/gh-copilot"), "copilot");
919        assert_eq!(infer_agent_type("/some/random/path"), "unknown");
920    }
921
922    #[test]
923    fn test_parse_probe_output_success() {
924        let output = r#"
925===PROBE_START===
926OS=linux
927ARCH=x86_64
928HOME=/home/user
929DISTRO=Ubuntu 22.04
930CASS_VERSION=0.1.50
931CASS_HEALTH=OK
932CASS_SESSIONS=1234
933HAS_CARGO=1
934HAS_BINSTALL=0
935HAS_CURL=1
936HAS_WGET=1
937DISK_AVAIL_KB=52428800
938MEM_TOTAL_KB=16777216
939MEM_AVAIL_KB=8388608
940AGENT_DATA=~/.claude/projects|150|42
941AGENT_DATA=~/.codex/sessions|50|10
942===PROBE_END===
943"#;
944
945        let result = parse_probe_output("test-host", output, 100);
946
947        assert!(result.reachable);
948        assert_eq!(result.host_name, "test-host");
949        assert_eq!(result.connection_time_ms, 100);
950
951        // Check cass status
952        assert!(
953            matches!(&result.cass_status, CassStatus::Indexed { .. }),
954            "expected Indexed status"
955        );
956        if let CassStatus::Indexed {
957            version,
958            session_count,
959            ..
960        } = &result.cass_status
961        {
962            assert_eq!(version, "0.1.50");
963            assert_eq!(*session_count, 1234);
964        }
965
966        // Check system info
967        let sys = result.system_info.as_ref().unwrap();
968        assert_eq!(sys.os, "linux");
969        assert_eq!(sys.arch, "x86_64");
970        assert_eq!(sys.distro, Some("Ubuntu 22.04".into()));
971        assert!(sys.has_cargo);
972        assert!(!sys.has_cargo_binstall);
973        assert!(sys.has_curl);
974
975        // Check resources
976        let res = result.resources.as_ref().unwrap();
977        assert_eq!(res.disk_available_mb, 51200); // 52428800 / 1024
978        assert_eq!(res.memory_total_mb, 16384); // 16777216 / 1024
979        assert!(res.can_compile);
980
981        // Check detected agents
982        assert_eq!(result.detected_agents.len(), 2);
983        assert_eq!(result.detected_agents[0].agent_type, "claude_code");
984        assert_eq!(result.detected_agents[0].estimated_sessions, Some(42));
985        assert_eq!(result.detected_agents[1].agent_type, "codex");
986    }
987
988    #[test]
989    fn test_parse_probe_output_ignores_noise_outside_markers() {
990        let output = r#"
991CASS_VERSION=NOT_FOUND
992AGENT_DATA=/tmp/outside-before|999|999
993===PROBE_START===
994OS=linux
995ARCH=x86_64
996HOME=/home/user
997CASS_VERSION=0.4.2
998CASS_HEALTH=OK
999CASS_SESSIONS=7
1000HAS_CARGO=1
1001HAS_BINSTALL=0
1002HAS_CURL=1
1003HAS_WGET=1
1004DISK_AVAIL_KB=2048000
1005MEM_TOTAL_KB=4096000
1006MEM_AVAIL_KB=1024000
1007===PROBE_END===
1008CASS_VERSION=NOT_FOUND
1009AGENT_DATA=/tmp/outside-after|999|999
1010"#;
1011
1012        let result = parse_probe_output("noisy-host", output, 42);
1013
1014        assert!(result.reachable);
1015        assert!(result.detected_agents.is_empty());
1016        assert!(matches!(
1017            result.cass_status,
1018            CassStatus::Indexed {
1019                ref version,
1020                session_count: 7,
1021                ..
1022            } if version == "0.4.2"
1023        ));
1024    }
1025
1026    #[test]
1027    fn test_parse_probe_output_cass_not_found() {
1028        let output = r#"
1029===PROBE_START===
1030OS=darwin
1031ARCH=arm64
1032HOME=/Users/user
1033CASS_VERSION=NOT_FOUND
1034HAS_CARGO=0
1035HAS_BINSTALL=0
1036HAS_CURL=1
1037HAS_WGET=0
1038DISK_AVAIL_KB=10240000
1039MEM_TOTAL_KB=8388608
1040MEM_AVAIL_KB=4194304
1041===PROBE_END===
1042"#;
1043
1044        let result = parse_probe_output("mac-host", output, 50);
1045
1046        assert!(result.reachable);
1047        assert!(matches!(result.cass_status, CassStatus::NotFound));
1048
1049        let sys = result.system_info.as_ref().unwrap();
1050        assert_eq!(sys.os, "darwin");
1051        assert_eq!(sys.arch, "arm64");
1052        assert!(!sys.has_cargo);
1053    }
1054
1055    #[test]
1056    fn test_parse_probe_output_malformed() {
1057        let output = "random garbage";
1058        let result = parse_probe_output("bad-host", output, 0);
1059
1060        assert!(!result.reachable);
1061        assert!(result.error.is_some());
1062    }
1063
1064    #[test]
1065    fn test_parse_probe_output_rejects_out_of_order_markers() {
1066        let output = r#"
1067===PROBE_END===
1068===PROBE_START===
1069OS=linux
1070CASS_VERSION=0.4.2
1071"#;
1072        let result = parse_probe_output("bad-host", output, 0);
1073
1074        assert!(!result.reachable);
1075        assert!(result.error.is_some());
1076    }
1077
1078    #[test]
1079    fn test_host_probe_result_unreachable() {
1080        let result = HostProbeResult::unreachable("test", "Connection refused");
1081
1082        assert!(!result.reachable);
1083        assert_eq!(result.error, Some("Connection refused".into()));
1084        assert!(!result.has_cass());
1085        assert!(!result.has_agent_data());
1086    }
1087
1088    #[test]
1089    fn test_probe_cache() {
1090        let mut cache = ProbeCache::new(300); // 5 minute TTL
1091
1092        let result = HostProbeResult {
1093            host_name: "test".into(),
1094            reachable: true,
1095            connection_time_ms: 100,
1096            cass_status: CassStatus::NotFound,
1097            detected_agents: vec![],
1098            system_info: None,
1099            resources: None,
1100            error: None,
1101        };
1102
1103        cache.insert(result);
1104
1105        assert!(cache.get("test").is_some());
1106        assert!(cache.get("nonexistent").is_none());
1107    }
1108
1109    #[test]
1110    fn test_resource_info_can_compile() {
1111        let good = ResourceInfo {
1112            disk_available_mb: 2000,
1113            memory_total_mb: 4000,
1114            memory_available_mb: 2000,
1115            can_compile: true,
1116        };
1117        assert!(good.can_compile);
1118
1119        let low_disk = ResourceInfo {
1120            disk_available_mb: 500,
1121            memory_total_mb: 4000,
1122            memory_available_mb: 2000,
1123            can_compile: false,
1124        };
1125        assert!(!low_disk.can_compile);
1126    }
1127
1128    // =========================================================================
1129    // Real system probe tests — run PROBE_SCRIPT locally without SSH
1130    // =========================================================================
1131
1132    /// Execute a probe script on the local system via bash, returning stdout.
1133    fn run_probe_script_with_home(script: &str, home: Option<&std::path::Path>) -> String {
1134        use std::io::Write;
1135        let mut cmd = Command::new("bash");
1136        cmd.arg("-s")
1137            .stdin(Stdio::piped())
1138            .stdout(Stdio::piped())
1139            .stderr(Stdio::piped());
1140        if let Some(home) = home {
1141            cmd.env("HOME", home);
1142        } else if dotenvy::var("HOME").is_err()
1143            && let Some(dirs) = directories::BaseDirs::new()
1144        {
1145            // Ensure HOME is set for the probe script (may not be set in some test environments).
1146            cmd.env("HOME", dirs.home_dir());
1147        }
1148        let mut child = cmd.spawn().expect("bash should be available");
1149        if let Some(mut stdin) = child.stdin.take() {
1150            stdin
1151                .write_all(script.as_bytes())
1152                .expect("write probe script");
1153        }
1154        let output = child
1155            .wait_with_output()
1156            .expect("probe script should finish");
1157        assert!(
1158            output.status.success(),
1159            "probe script failed: {:?}",
1160            String::from_utf8_lossy(&output.stderr)
1161        );
1162        String::from_utf8_lossy(&output.stdout).to_string()
1163    }
1164
1165    /// Execute PROBE_SCRIPT on the local system via bash, returning stdout.
1166    fn run_probe_script_locally() -> String {
1167        run_probe_script_with_home(&build_probe_script(), None)
1168    }
1169
1170    #[test]
1171    fn shell_single_quote_arg_quotes_shell_metacharacters() {
1172        assert_eq!(shell_single_quote_arg("plain/path"), "'plain/path'");
1173        assert_eq!(shell_single_quote_arg("can't"), "'can'\\''t'");
1174        assert_eq!(
1175            shell_single_quote_arg("$(touch /tmp/nope); `whoami`"),
1176            "'$(touch /tmp/nope); `whoami`'"
1177        );
1178    }
1179
1180    #[test]
1181    fn probe_script_uses_literal_array_without_eval() {
1182        let script = build_probe_script();
1183        assert!(script.contains("PROBE_DIRS=("));
1184        assert!(script.contains("for dir in \"${PROBE_DIRS[@]}\""));
1185        assert!(script.contains("expanded_dir=\"$HOME/${dir#\\~/}\""));
1186        assert!(
1187            !script.contains("eval echo"),
1188            "probe paths must not be expanded through eval"
1189        );
1190    }
1191
1192    #[test]
1193    fn probe_script_treats_special_probe_paths_as_literals() {
1194        let home = tempfile::tempdir().expect("temp home");
1195        let relative_path =
1196            "Library/Application Support/Codex$(touch \"$HOME/SHOULD_NOT_EXIST\");can't";
1197        std::fs::create_dir_all(home.path().join(relative_path)).expect("create special path");
1198
1199        let probe_path = format!("~/{relative_path}");
1200        let script = build_probe_script_for_dirs(std::slice::from_ref(&probe_path));
1201        let output = run_probe_script_with_home(&script, Some(home.path()));
1202
1203        assert!(
1204            output.contains(&format!("AGENT_DATA={probe_path}|")),
1205            "special probe path should be reported literally: {output}"
1206        );
1207        assert!(
1208            !home.path().join("SHOULD_NOT_EXIST").exists(),
1209            "probe path interpolation must not execute command substitutions"
1210        );
1211
1212        let result = parse_probe_output("localhost", &output, 0);
1213        assert!(
1214            result
1215                .detected_agents
1216                .iter()
1217                .any(|agent| agent.path == probe_path),
1218            "parsed agent data should preserve literal path"
1219        );
1220    }
1221
1222    #[test]
1223    fn real_probe_script_produces_valid_markers() {
1224        let output = run_probe_script_locally();
1225        assert!(
1226            output.contains("===PROBE_START==="),
1227            "missing PROBE_START marker"
1228        );
1229        assert!(
1230            output.contains("===PROBE_END==="),
1231            "missing PROBE_END marker"
1232        );
1233    }
1234
1235    #[test]
1236    fn real_probe_script_parses_into_reachable_result() {
1237        let output = run_probe_script_locally();
1238        let result = parse_probe_output("localhost", &output, 0);
1239        assert!(
1240            result.reachable,
1241            "local probe should be reachable: {:?}",
1242            result.error
1243        );
1244        assert!(result.system_info.is_some(), "should have system info");
1245        assert!(result.resources.is_some(), "should have resource info");
1246    }
1247
1248    #[test]
1249    fn real_probe_system_info_has_valid_os() {
1250        let output = run_probe_script_locally();
1251        let result = parse_probe_output("localhost", &output, 0);
1252        let sys = result.system_info.as_ref().expect("system_info");
1253        assert!(
1254            sys.os == "linux" || sys.os == "darwin",
1255            "OS should be linux or darwin, got: {}",
1256            sys.os
1257        );
1258    }
1259
1260    #[test]
1261    fn real_probe_system_info_has_valid_arch() {
1262        let output = run_probe_script_locally();
1263        let result = parse_probe_output("localhost", &output, 0);
1264        let sys = result.system_info.as_ref().expect("system_info");
1265        let valid_archs = [
1266            "x86_64", "aarch64", "arm64", "armv7l", "i686", "s390x", "ppc64le",
1267        ];
1268        assert!(
1269            valid_archs.contains(&sys.arch.as_str()),
1270            "arch should be a known value, got: {}",
1271            sys.arch
1272        );
1273    }
1274
1275    #[test]
1276    fn real_probe_system_info_has_nonempty_home() {
1277        let output = run_probe_script_locally();
1278        let result = parse_probe_output("localhost", &output, 0);
1279        let sys = result.system_info.as_ref().expect("system_info");
1280        assert!(!sys.remote_home.is_empty(), "home should not be empty");
1281        assert!(
1282            sys.remote_home.starts_with('/'),
1283            "home should be absolute: {}",
1284            sys.remote_home
1285        );
1286    }
1287
1288    #[test]
1289    fn real_probe_resources_have_nonzero_disk() {
1290        let output = run_probe_script_locally();
1291        let result = parse_probe_output("localhost", &output, 0);
1292        let res = result.resources.as_ref().expect("resources");
1293        assert!(res.disk_available_mb > 0, "disk_available_mb should be > 0");
1294    }
1295
1296    #[test]
1297    fn real_probe_resources_have_nonzero_memory() {
1298        let output = run_probe_script_locally();
1299        let result = parse_probe_output("localhost", &output, 0);
1300        let res = result.resources.as_ref().expect("resources");
1301        assert!(res.memory_total_mb > 0, "memory_total_mb should be > 0");
1302        assert!(
1303            res.memory_available_mb > 0,
1304            "memory_available_mb should be > 0"
1305        );
1306    }
1307
1308    #[test]
1309    fn real_probe_resources_memory_invariant() {
1310        let output = run_probe_script_locally();
1311        let result = parse_probe_output("localhost", &output, 0);
1312        let res = result.resources.as_ref().expect("resources");
1313        assert!(
1314            res.memory_available_mb <= res.memory_total_mb,
1315            "available memory ({}) should not exceed total ({})",
1316            res.memory_available_mb,
1317            res.memory_total_mb
1318        );
1319    }
1320
1321    #[test]
1322    fn real_probe_resources_can_compile_reflects_thresholds() {
1323        let output = run_probe_script_locally();
1324        let result = parse_probe_output("localhost", &output, 0);
1325        let res = result.resources.as_ref().expect("resources");
1326        let expected = res.disk_available_mb >= ResourceInfo::MIN_DISK_MB
1327            && res.memory_total_mb >= ResourceInfo::MIN_MEMORY_MB;
1328        assert_eq!(
1329            res.can_compile, expected,
1330            "can_compile should match threshold check: disk={}MB mem={}MB",
1331            res.disk_available_mb, res.memory_total_mb
1332        );
1333    }
1334
1335    #[test]
1336    fn real_probe_tool_detection_is_consistent() {
1337        let output = run_probe_script_locally();
1338        let result = parse_probe_output("localhost", &output, 0);
1339        let sys = result.system_info.as_ref().expect("system_info");
1340        // If cargo-binstall is available, cargo must also be available
1341        if sys.has_cargo_binstall {
1342            assert!(sys.has_cargo, "binstall requires cargo");
1343        }
1344        // At least one download tool should exist on any modern system
1345        assert!(
1346            sys.has_curl || sys.has_wget,
1347            "system should have at least curl or wget"
1348        );
1349    }
1350
1351    #[test]
1352    fn probe_script_contains_all_franken_agent_detection_paths() {
1353        let script = build_probe_script();
1354        // Verify key agent paths from franken_agent_detection are present
1355        assert!(script.contains("~/.claude"), "missing claude paths");
1356        assert!(script.contains("~/.codex/sessions"), "missing codex path");
1357        assert!(script.contains("~/.gemini"), "missing gemini paths");
1358        assert!(script.contains("~/.goose/sessions"), "missing goose path");
1359        assert!(
1360            script.contains("~/.continue/sessions"),
1361            "missing continue path"
1362        );
1363        assert!(script.contains("~/.aider"), "missing aider path");
1364        assert!(
1365            script.contains("saoudrizwan.claude-dev"),
1366            "missing cline path"
1367        );
1368        assert!(script.contains("copilot-chat"), "missing copilot path");
1369        assert!(script.contains("~/.windsurf"), "missing windsurf path");
1370        assert!(script.contains("~/.factory"), "missing factory path");
1371        assert!(script.contains("~/.clawdbot"), "missing clawdbot path");
1372        assert!(script.contains("~/.vibe"), "missing vibe path");
1373        assert!(script.contains("sourcegraph.amp"), "missing amp path");
1374        // Verify script structure
1375        assert!(script.contains("===PROBE_START==="));
1376        assert!(script.contains("===PROBE_END==="));
1377        assert!(script.contains("for dir in \"${PROBE_DIRS[@]}\""));
1378    }
1379
1380    #[test]
1381    fn infer_agent_type_covers_all_dynamic_agents() {
1382        // Ensure infer_agent_type handles all agents from franken_agent_detection
1383        assert_eq!(infer_agent_type("~/.goose/sessions"), "goose");
1384        assert_eq!(infer_agent_type("~/.continue/sessions"), "continue");
1385        assert_eq!(infer_agent_type("~/.clawdbot/sessions"), "clawdbot");
1386        assert_eq!(infer_agent_type("~/.factory/sessions"), "factory");
1387        assert_eq!(infer_agent_type("~/.vibe/logs/session"), "vibe");
1388        assert_eq!(infer_agent_type("~/.windsurf"), "windsurf");
1389        assert_eq!(
1390            infer_agent_type("~/.config/Code/User/globalStorage/sourcegraph.amp"),
1391            "amp"
1392        );
1393        assert_eq!(infer_agent_type("~/.pi/agent/sessions"), "pi_agent");
1394    }
1395
1396    // =========================================================================
1397    // Deduplication tests
1398    // =========================================================================
1399
1400    fn make_probe_result(
1401        name: &str,
1402        machine_id: Option<&str>,
1403        sessions: Option<u64>,
1404    ) -> HostProbeResult {
1405        HostProbeResult {
1406            host_name: name.to_string(),
1407            reachable: true,
1408            connection_time_ms: 100,
1409            cass_status: if let Some(s) = sessions {
1410                CassStatus::Indexed {
1411                    version: "0.1.50".into(),
1412                    session_count: s,
1413                    last_indexed: None,
1414                }
1415            } else {
1416                CassStatus::NotFound
1417            },
1418            detected_agents: vec![],
1419            system_info: Some(SystemInfo {
1420                os: "linux".into(),
1421                arch: "x86_64".into(),
1422                distro: Some("Ubuntu 25.10".into()),
1423                has_cargo: true,
1424                has_cargo_binstall: false,
1425                has_curl: true,
1426                has_wget: true,
1427                remote_home: "/home/ubuntu".into(),
1428                machine_id: machine_id.map(String::from),
1429            }),
1430            resources: Some(ResourceInfo {
1431                disk_available_mb: 800_000,
1432                memory_total_mb: 16_000,
1433                memory_available_mb: 8_000,
1434                can_compile: true,
1435            }),
1436            error: None,
1437        }
1438    }
1439
1440    #[test]
1441    fn test_deduplicate_no_duplicates() {
1442        let results = vec![
1443            make_probe_result("host1", Some("machine-1"), Some(100)),
1444            make_probe_result("host2", Some("machine-2"), Some(200)),
1445        ];
1446
1447        let (deduped, merged) = deduplicate_probe_results(results);
1448
1449        assert_eq!(deduped.len(), 2);
1450        assert!(merged.is_empty());
1451    }
1452
1453    #[test]
1454    fn test_deduplicate_same_machine() {
1455        // Two SSH aliases for the same machine
1456        let results = vec![
1457            make_probe_result("jain", Some("abc123"), None),
1458            make_probe_result("jain_ovh_box", Some("abc123"), None),
1459        ];
1460
1461        let (deduped, merged) = deduplicate_probe_results(results);
1462
1463        assert_eq!(deduped.len(), 1);
1464        // Should keep "jain" (alphabetically first since neither has cass)
1465        assert_eq!(deduped[0].host_name, "jain");
1466        assert_eq!(
1467            merged.get("jain").unwrap(),
1468            &vec!["jain_ovh_box".to_string()]
1469        );
1470    }
1471
1472    #[test]
1473    fn test_deduplicate_prefers_installed_cass() {
1474        // Two aliases, one with cass installed
1475        let results = vec![
1476            make_probe_result("alias_a", Some("machine-x"), None), // no cass
1477            make_probe_result("alias_b", Some("machine-x"), Some(500)), // has cass
1478        ];
1479
1480        let (deduped, merged) = deduplicate_probe_results(results);
1481
1482        assert_eq!(deduped.len(), 1);
1483        // Should keep alias_b because it has cass installed
1484        assert_eq!(deduped[0].host_name, "alias_b");
1485        assert!(merged.contains_key("alias_b"));
1486    }
1487
1488    #[test]
1489    fn test_deduplicate_prefers_more_sessions() {
1490        // Both have cass, but different session counts
1491        let results = vec![
1492            make_probe_result("host_low", Some("machine-y"), Some(50)),
1493            make_probe_result("host_high", Some("machine-y"), Some(500)),
1494        ];
1495
1496        let (deduped, merged) = deduplicate_probe_results(results);
1497
1498        assert_eq!(deduped.len(), 1);
1499        // Should keep host_high because it has more sessions
1500        assert_eq!(deduped[0].host_name, "host_high");
1501        // Verify the merge recorded the merged alias
1502        assert!(merged.contains_key("host_high"));
1503    }
1504
1505    #[test]
1506    fn test_deduplicate_no_machine_id_not_merged() {
1507        // Hosts without machine_id should not be merged
1508        let results = vec![
1509            make_probe_result("host1", None, Some(100)),
1510            make_probe_result("host2", None, Some(200)),
1511        ];
1512
1513        let (deduped, merged) = deduplicate_probe_results(results);
1514
1515        assert_eq!(deduped.len(), 2);
1516        assert!(merged.is_empty());
1517    }
1518
1519    #[test]
1520    fn test_deduplicate_mixed_with_and_without_machine_id() {
1521        let results = vec![
1522            make_probe_result("aliasA", Some("same-machine"), Some(100)),
1523            make_probe_result("aliasB", Some("same-machine"), Some(50)),
1524            make_probe_result("standalone", None, Some(75)),
1525        ];
1526
1527        let (deduped, merged) = deduplicate_probe_results(results);
1528
1529        // 2 hosts: one from deduplication, one standalone
1530        assert_eq!(deduped.len(), 2);
1531        // aliasA should be kept (more sessions)
1532        assert!(deduped.iter().any(|h| h.host_name == "aliasA"));
1533        assert!(deduped.iter().any(|h| h.host_name == "standalone"));
1534        assert_eq!(merged.len(), 1);
1535    }
1536
1537    #[test]
1538    fn test_deduplicate_three_aliases_same_machine() {
1539        let results = vec![
1540            make_probe_result("alias1", Some("same"), Some(100)),
1541            make_probe_result("alias2", Some("same"), Some(200)),
1542            make_probe_result("alias3", Some("same"), Some(150)),
1543        ];
1544
1545        let (deduped, merged) = deduplicate_probe_results(results);
1546
1547        assert_eq!(deduped.len(), 1);
1548        // alias2 has the most sessions
1549        assert_eq!(deduped[0].host_name, "alias2");
1550        // The merged list should contain the other two aliases
1551        let merged_list = merged.get("alias2").unwrap();
1552        assert_eq!(merged_list.len(), 2);
1553        assert!(merged_list.contains(&"alias1".to_string()));
1554        assert!(merged_list.contains(&"alias3".to_string()));
1555    }
1556
1557    #[test]
1558    fn real_probe_machine_id_present() {
1559        // Test that the local probe script actually collects machine_id
1560        let output = run_probe_script_locally();
1561        let result = parse_probe_output("localhost", &output, 0);
1562        let sys = result.system_info.as_ref().expect("system_info");
1563
1564        // On Linux or macOS, we should get a machine_id
1565        // (this test may be skipped on unusual systems)
1566        if sys.os == "linux" || sys.os == "darwin" {
1567            assert!(
1568                sys.machine_id.is_some(),
1569                "machine_id should be present on {}",
1570                sys.os
1571            );
1572            let mid = sys.machine_id.as_ref().unwrap();
1573            assert!(!mid.is_empty(), "machine_id should not be empty");
1574            // Machine IDs are typically 32+ hex chars or UUID format
1575            assert!(
1576                mid.len() >= 32,
1577                "machine_id should be at least 32 chars, got: {}",
1578                mid
1579            );
1580        }
1581    }
1582}