Skip to main content

coding_agent_search/sources/
probe.rs

1//! SSH host probing for remote source setup.
2//!
3//! This module provides functionality to probe SSH hosts and gather comprehensive
4//! information needed for remote source configuration decisions:
5//! - Whether cass is installed (and what version)
6//! - Index status (session count)
7//! - Detected agent session data directories
8//! - System information (OS, architecture)
9//! - Resource availability (disk space, memory)
10//!
11//! # Design
12//!
13//! Probing uses a single SSH session per host to minimize latency. A bash probe
14//! script is piped to `bash -s` on the remote, gathering all information in one
15//! round-trip.
16//!
17//! # Example
18//!
19//! ```rust,ignore
20//! use coding_agent_search::sources::probe::{probe_host, probe_hosts_parallel};
21//! use coding_agent_search::sources::config::DiscoveredHost;
22//!
23//! // Single host probe (returns HostProbeResult directly, not Result)
24//! let host = DiscoveredHost { name: "laptop".into(), .. };
25//! let result = probe_host(&host, 10);
26//! if result.reachable {
27//!     println!("Connected in {}ms", result.connection_time_ms);
28//! }
29//!
30//! // Parallel probing with progress (synchronous, uses rayon internally)
31//! let results = probe_hosts_parallel(&hosts, 10, |done, total, name| {
32//!     println!("Probing {}/{}: {}", done, total, name);
33//! });
34//! ```
35
36use std::collections::HashMap;
37use std::process::{Command, Stdio};
38use std::time::{Duration, Instant};
39
40use serde::{Deserialize, Serialize};
41
42use super::{
43    config::DiscoveredHost, configure_child_process_group, host_key_verification_error,
44    is_host_key_verification_failure, strict_ssh_cli_tokens, wait_for_child_output_with_timeout,
45};
46
47/// Default connection timeout in seconds.
48pub const DEFAULT_PROBE_TIMEOUT: u64 = 10;
49
50/// Result of probing an SSH host.
51#[derive(Debug, Clone, Serialize, Deserialize)]
52pub struct HostProbeResult {
53    /// SSH config host alias.
54    pub host_name: String,
55    /// Whether the host was reachable via SSH.
56    pub reachable: bool,
57    /// Connection time in milliseconds.
58    pub connection_time_ms: u64,
59    /// Status of cass installation on the remote.
60    pub cass_status: CassStatus,
61    /// Detected agent session directories.
62    pub detected_agents: Vec<DetectedAgent>,
63    /// System information.
64    pub system_info: Option<SystemInfo>,
65    /// Resource information (disk/memory).
66    pub resources: Option<ResourceInfo>,
67    /// Error message if probe failed.
68    pub error: Option<String>,
69}
70
71impl HostProbeResult {
72    /// Create a result for an unreachable host.
73    pub fn unreachable(host_name: &str, error: impl Into<String>) -> Self {
74        Self {
75            host_name: host_name.to_string(),
76            reachable: false,
77            connection_time_ms: 0,
78            cass_status: CassStatus::Unknown,
79            detected_agents: Vec::new(),
80            system_info: None,
81            resources: None,
82            error: Some(error.into()),
83        }
84    }
85
86    /// Check if cass is installed on this host.
87    pub fn has_cass(&self) -> bool {
88        self.cass_status.is_installed()
89    }
90
91    /// Check if this host has any agent session data.
92    pub fn has_agent_data(&self) -> bool {
93        !self.detected_agents.is_empty()
94    }
95
96    /// Get total estimated sessions across all detected agents.
97    pub fn total_sessions(&self) -> u64 {
98        self.detected_agents
99            .iter()
100            .filter_map(|a| a.estimated_sessions)
101            .sum()
102    }
103}
104
105/// Status of cass installation on a remote host.
106#[derive(Debug, Clone, Serialize, Deserialize)]
107#[serde(tag = "status", rename_all = "snake_case")]
108pub enum CassStatus {
109    /// cass is installed and has an indexed database.
110    Indexed {
111        version: String,
112        session_count: u64,
113        last_indexed: Option<String>,
114    },
115    /// cass is installed but no index exists or is empty.
116    InstalledNotIndexed { version: String },
117    /// cass is not found on PATH.
118    NotFound,
119    /// Couldn't determine cass status.
120    Unknown,
121}
122
123impl CassStatus {
124    /// Check if cass is installed (any version).
125    pub fn is_installed(&self) -> bool {
126        matches!(
127            self,
128            CassStatus::Indexed { .. } | CassStatus::InstalledNotIndexed { .. }
129        )
130    }
131
132    /// Get the installed version if available.
133    pub fn version(&self) -> Option<&str> {
134        match self {
135            CassStatus::Indexed { version, .. } | CassStatus::InstalledNotIndexed { version } => {
136                Some(version)
137            }
138            _ => None,
139        }
140    }
141}
142
143/// Detected agent session data on a remote host.
144#[derive(Debug, Clone, Serialize, Deserialize)]
145pub struct DetectedAgent {
146    /// Type of agent (claude_code, codex, cursor, etc.).
147    pub agent_type: String,
148    /// Path to the agent's session directory.
149    pub path: String,
150    /// Estimated number of sessions (from file count).
151    pub estimated_sessions: Option<u64>,
152    /// Estimated size in megabytes.
153    pub estimated_size_mb: Option<u64>,
154}
155
156/// System information gathered from remote host.
157#[derive(Debug, Clone, Serialize, Deserialize)]
158pub struct SystemInfo {
159    /// Operating system (linux, darwin).
160    pub os: String,
161    /// CPU architecture (x86_64, aarch64).
162    pub arch: String,
163    /// Linux distro name if available.
164    pub distro: Option<String>,
165    /// Whether cargo is available.
166    pub has_cargo: bool,
167    /// Whether cargo-binstall is available.
168    pub has_cargo_binstall: bool,
169    /// Whether curl is available.
170    pub has_curl: bool,
171    /// Whether wget is available.
172    pub has_wget: bool,
173    /// Remote home directory.
174    pub remote_home: String,
175    /// Unique machine identifier (for deduplication of SSH aliases).
176    /// On Linux: /etc/machine-id, on macOS: IOPlatformUUID.
177    #[serde(default)]
178    pub machine_id: Option<String>,
179}
180
181/// Resource information for installation feasibility.
182#[derive(Debug, Clone, Serialize, Deserialize)]
183pub struct ResourceInfo {
184    /// Available disk space in MB (in home directory).
185    pub disk_available_mb: u64,
186    /// Total memory in MB.
187    pub memory_total_mb: u64,
188    /// Available memory in MB.
189    pub memory_available_mb: u64,
190    /// Heuristic: enough resources to compile Rust.
191    pub can_compile: bool,
192}
193
194impl ResourceInfo {
195    /// Minimum disk space (MB) recommended for cass installation.
196    pub const MIN_DISK_MB: u64 = 1024; // 1 GB
197
198    /// Minimum memory (MB) recommended for compilation.
199    pub const MIN_MEMORY_MB: u64 = 2048; // 2 GB
200}
201
202fn shell_single_quote_arg(value: &str) -> String {
203    format!("'{}'", value.replace('\'', r#"'\''"#))
204}
205
206fn collect_probe_dirs(probe_paths: Vec<(&'static str, Vec<String>)>) -> Vec<String> {
207    let mut dir_list = Vec::new();
208    for (_slug, paths) in probe_paths {
209        for path in paths {
210            dir_list.push(path);
211        }
212    }
213    dir_list.sort();
214    dir_list.dedup();
215    dir_list
216}
217
218fn probe_dir_array_entries(dir_list: &[String]) -> String {
219    dir_list
220        .iter()
221        .map(|path| format!("    {}", shell_single_quote_arg(path)))
222        .collect::<Vec<_>>()
223        .join("\n")
224}
225
226/// Build the bash probe script that gathers all information in one SSH call.
227///
228/// Agent detection paths are sourced dynamically from `franken_agent_detection`
229/// so that new connectors are automatically included in SSH probes.
230///
231/// Output format is key=value pairs, with special markers for sections.
232fn build_probe_script() -> String {
233    let dir_list = collect_probe_dirs(franken_agent_detection::default_probe_paths_tilde());
234    build_probe_script_for_dirs(&dir_list)
235}
236
237fn build_probe_script_for_dirs(dir_list: &[String]) -> String {
238    let dirs_str = probe_dir_array_entries(dir_list);
239
240    format!(
241        r#"#!/bin/bash
242echo "===PROBE_START==="
243
244# System info
245echo "OS=$(uname -s | tr '[:upper:]' '[:lower:]')"
246echo "ARCH=$(uname -m)"
247echo "HOME=$HOME"
248
249# Distro detection (Linux only)
250if [ -f /etc/os-release ]; then
251    . /etc/os-release
252    echo "DISTRO=$PRETTY_NAME"
253fi
254
255# Machine ID for deduplication of SSH aliases pointing to same host
256# Linux: /etc/machine-id, macOS: IOPlatformUUID
257if [ -f /etc/machine-id ]; then
258    MACHINE_ID=$(cat /etc/machine-id 2>/dev/null | tr -d '\n')
259    echo "MACHINE_ID=$MACHINE_ID"
260elif command -v ioreg &> /dev/null; then
261    MACHINE_ID=$(ioreg -rd1 -c IOPlatformExpertDevice 2>/dev/null | awk -F'"' '/IOPlatformUUID/{{print $4}}')
262    echo "MACHINE_ID=$MACHINE_ID"
263fi
264
265# Cass status - check PATH and common install locations
266# Non-interactive SSH doesn't source .bashrc, so user bin dirs may not be in PATH
267CASS_BIN=""
268if command -v cass &> /dev/null; then
269    CASS_BIN="cass"
270elif [ -x "$HOME/.cargo/bin/cass" ]; then
271    CASS_BIN="$HOME/.cargo/bin/cass"
272elif [ -x "$HOME/.local/bin/cass" ]; then
273    CASS_BIN="$HOME/.local/bin/cass"
274elif [ -x "/usr/local/bin/cass" ]; then
275    CASS_BIN="/usr/local/bin/cass"
276fi
277
278if [ -n "$CASS_BIN" ]; then
279    CASS_VER=$("$CASS_BIN" --version 2>/dev/null | head -1 | awk '{{print $2}}')
280    if [ -z "$CASS_VER" ]; then
281        # Binary exists but version command failed - treat as not found
282        echo "CASS_VERSION=NOT_FOUND"
283    else
284        echo "CASS_VERSION=$CASS_VER"
285
286        # Get health status (JSON output) - only if version was detected
287        if "$CASS_BIN" health --json &>/dev/null; then
288            echo "CASS_HEALTH=OK"
289            # Try to get session count from stats
290            STATS=$("$CASS_BIN" stats --json 2>/dev/null)
291            if [ $? -eq 0 ] && [ -n "$STATS" ]; then
292                # Extract total conversations from JSON (allow whitespace/newlines)
293                SESSIONS=$(echo "$STATS" | tr -d '\n' | sed -n 's/.*"conversations"[[:space:]]*:[[:space:]]*\([0-9][0-9]*\).*/\1/p')
294                echo "CASS_SESSIONS=${{SESSIONS:-0}}"
295            else
296                echo "CASS_SESSIONS=0"
297            fi
298        else
299            echo "CASS_HEALTH=NOT_INDEXED"
300        fi
301    fi
302else
303    echo "CASS_VERSION=NOT_FOUND"
304fi
305
306# Tool availability - also check ~/.cargo/bin for non-interactive SSH sessions
307if command -v cargo &> /dev/null || [ -x "$HOME/.cargo/bin/cargo" ]; then
308    echo "HAS_CARGO=1"
309else
310    echo "HAS_CARGO=0"
311fi
312if command -v cargo-binstall &> /dev/null || [ -x "$HOME/.cargo/bin/cargo-binstall" ]; then
313    echo "HAS_BINSTALL=1"
314else
315    echo "HAS_BINSTALL=0"
316fi
317command -v curl &> /dev/null && echo "HAS_CURL=1" || echo "HAS_CURL=0"
318command -v wget &> /dev/null && echo "HAS_WGET=1" || echo "HAS_WGET=0"
319
320# Resource info - disk (in KB, converted later)
321DISK_KB=$(df -k ~ 2>/dev/null | awk 'NR==2 {{print $4}}')
322echo "DISK_AVAIL_KB=${{DISK_KB:-0}}"
323
324# Memory info (Linux)
325if [ -f /proc/meminfo ]; then
326    MEM_TOTAL=$(grep MemTotal /proc/meminfo 2>/dev/null | awk '{{print $2}}')
327    MEM_AVAIL=$(grep MemAvailable /proc/meminfo 2>/dev/null | awk '{{print $2}}')
328    echo "MEM_TOTAL_KB=${{MEM_TOTAL:-0}}"
329    echo "MEM_AVAIL_KB=${{MEM_AVAIL:-0}}"
330else
331    # macOS - use sysctl
332    if command -v sysctl &> /dev/null; then
333        MEM_BYTES=$(sysctl -n hw.memsize 2>/dev/null)
334        MEM_KB=$((MEM_BYTES / 1024))
335        echo "MEM_TOTAL_KB=${{MEM_KB:-0}}"
336        echo "MEM_AVAIL_KB=${{MEM_KB:-0}}"  # macOS doesn't have easy available mem
337    fi
338fi
339
340# Agent data detection (with sizes and file counts)
341PROBE_DIRS=(
342{dirs}
343)
344for dir in "${{PROBE_DIRS[@]}}"; do
345    # Expand only the leading tilde marker from our static probe list. Do not
346    # eval paths: connector-owned paths can contain shell metacharacters.
347    case "$dir" in
348        "~") expanded_dir="$HOME" ;;
349        "~/"*) expanded_dir="$HOME/${{dir#\~/}}" ;;
350        *) expanded_dir="$dir" ;;
351    esac
352    if [ -e "$expanded_dir" ]; then
353        SIZE=$(du -sm "$expanded_dir" 2>/dev/null | cut -f1)
354        # Count JSONL files for session estimate
355        if [ -d "$expanded_dir" ]; then
356            # Keep probe bounded for very large trees: depth-limit and timeout when available.
357            if command -v timeout &> /dev/null; then
358                COUNT=$(timeout 5s find "$expanded_dir" -maxdepth 8 \( -name "*.jsonl" -o -name "*.json" \) 2>/dev/null | wc -l | tr -d ' ')
359            elif command -v gtimeout &> /dev/null; then
360                COUNT=$(gtimeout 5s find "$expanded_dir" -maxdepth 8 \( -name "*.jsonl" -o -name "*.json" \) 2>/dev/null | wc -l | tr -d ' ')
361            else
362                COUNT=$(find "$expanded_dir" -maxdepth 8 \( -name "*.jsonl" -o -name "*.json" \) 2>/dev/null | wc -l | tr -d ' ')
363            fi
364        else
365            COUNT=1  # Single file
366        fi
367        echo "AGENT_DATA=$dir|${{SIZE:-0}}|${{COUNT:-0}}"
368    fi
369done
370
371echo "===PROBE_END==="
372"#,
373        dirs = dirs_str
374    )
375}
376
377/// Probe a single SSH host.
378///
379/// Runs a comprehensive probe script via SSH to gather system info, cass status,
380/// and detected agent data. Uses a single SSH session for efficiency.
381///
382/// # Arguments
383/// * `host` - The discovered SSH host to probe
384/// * `timeout_secs` - Connection timeout in seconds
385///
386/// # Returns
387/// A `HostProbeResult` with all gathered information, or error details if probe failed.
388pub fn probe_host(host: &DiscoveredHost, timeout_secs: u64) -> HostProbeResult {
389    let start = Instant::now();
390    let timeout_secs = timeout_secs.max(1);
391    let command_timeout = Duration::from_secs(timeout_secs);
392
393    // Build SSH command with strict host key verification.
394    // Security-first: do not auto-trust unknown hosts during probing.
395    // Use the host alias directly (SSH config handles Port, User, IdentityFile, ProxyJump, etc.)
396    let mut cmd = Command::new("ssh");
397    cmd.args(strict_ssh_cli_tokens(timeout_secs))
398        .arg("--")
399        .arg(&host.name)
400        .arg("bash -s")
401        .stdin(Stdio::piped())
402        .stdout(Stdio::piped())
403        .stderr(Stdio::piped());
404    configure_child_process_group(&mut cmd);
405
406    // Spawn the process and write probe script to stdin
407    let mut child = match cmd.spawn() {
408        Ok(c) => c,
409        Err(e) => {
410            return HostProbeResult::unreachable(
411                &host.name,
412                format!("Failed to execute ssh: {}", e),
413            );
414        }
415    };
416
417    // Write probe script to stdin
418    let probe_script = build_probe_script();
419    let write_error = if let Some(mut stdin) = child.stdin.take() {
420        use std::io::Write;
421        stdin.write_all(probe_script.as_bytes()).err()
422    } else {
423        None
424    };
425
426    // Wait for completion
427    let output = match wait_for_child_output_with_timeout(child, command_timeout) {
428        Ok(Some(o)) => o,
429        Ok(None) => {
430            return HostProbeResult::unreachable(
431                &host.name,
432                format!("Connection timed out after {timeout_secs} seconds"),
433            );
434        }
435        Err(e) => {
436            return HostProbeResult::unreachable(&host.name, format!("SSH command failed: {}", e));
437        }
438    };
439
440    let connection_time_ms = start.elapsed().as_millis() as u64;
441
442    // Check for SSH failures
443    if !output.status.success() {
444        let stderr = String::from_utf8_lossy(&output.stderr);
445        let error_msg = if stderr.contains("Connection refused") {
446            "Connection refused".to_string()
447        } else if stderr.contains("Connection timed out") || stderr.contains("timed out") {
448            "Connection timed out".to_string()
449        } else if stderr.contains("Permission denied") {
450            "Permission denied (key not loaded in ssh-agent?)".to_string()
451        } else if is_host_key_verification_failure(&stderr) {
452            host_key_verification_error(&host.name)
453        } else if stderr.contains("No route to host") {
454            "No route to host".to_string()
455        } else {
456            format!("SSH failed: {}", stderr.trim())
457        };
458
459        return HostProbeResult::unreachable(&host.name, error_msg);
460    }
461    if let Some(e) = write_error {
462        return HostProbeResult::unreachable(
463            &host.name,
464            format!("Failed to write probe script: {}", e),
465        );
466    }
467
468    // Parse successful output
469    let stdout = String::from_utf8_lossy(&output.stdout);
470    parse_probe_output(&host.name, &stdout, connection_time_ms)
471}
472
473/// Parse the probe script output into a HostProbeResult.
474fn parse_probe_output(host_name: &str, output: &str, connection_time_ms: u64) -> HostProbeResult {
475    let mut values: HashMap<String, String> = HashMap::new();
476    let mut agent_data: Vec<(String, u64, u64)> = Vec::new(); // (path, size_mb, count)
477
478    // Parse only key=value pairs emitted by the probe script itself. SSH login
479    // banners, forced-command wrappers, or shell noise can appear before or
480    // after the markers and must not override the measured values.
481    let mut inside_probe = false;
482    let mut saw_start = false;
483    let mut saw_end = false;
484    for line in output.lines() {
485        let line = line.trim();
486        if line == "===PROBE_START===" {
487            if saw_start {
488                return HostProbeResult::unreachable(host_name, "Probe script output malformed");
489            }
490            saw_start = true;
491            inside_probe = true;
492            continue;
493        }
494        if line == "===PROBE_END===" {
495            if !inside_probe {
496                return HostProbeResult::unreachable(host_name, "Probe script output malformed");
497            }
498            saw_end = true;
499            break;
500        }
501        if !inside_probe || line.is_empty() || line.starts_with("===") {
502            continue;
503        }
504
505        if line.starts_with("AGENT_DATA=") {
506            // Special handling for agent data: AGENT_DATA=path|size|count
507            if let Some(data) = line.strip_prefix("AGENT_DATA=") {
508                // Use rsplitn to handle paths containing pipes (parse from right)
509                // Yields: count, size, path
510                let parts: Vec<&str> = data.rsplitn(3, '|').collect();
511                if parts.len() == 3 {
512                    let count = parts[0].parse().unwrap_or(0);
513                    let size = parts[1].parse().unwrap_or(0);
514                    let path = parts[2].to_string();
515                    agent_data.push((path, size, count));
516                }
517            }
518        } else if let Some((key, value)) = line.split_once('=') {
519            values.insert(key.to_string(), value.to_string());
520        }
521    }
522
523    if !saw_start || !saw_end {
524        return HostProbeResult::unreachable(host_name, "Probe script output malformed");
525    }
526
527    // Build CassStatus
528    let cass_status = if let Some(version) = values.get("CASS_VERSION") {
529        if version == "NOT_FOUND" {
530            CassStatus::NotFound
531        } else {
532            let health = values.get("CASS_HEALTH").map(|s| s.as_str());
533            if health == Some("OK") {
534                let sessions = values
535                    .get("CASS_SESSIONS")
536                    .and_then(|s| s.parse().ok())
537                    .unwrap_or(0);
538                CassStatus::Indexed {
539                    version: version.clone(),
540                    session_count: sessions,
541                    last_indexed: None,
542                }
543            } else {
544                CassStatus::InstalledNotIndexed {
545                    version: version.clone(),
546                }
547            }
548        }
549    } else {
550        CassStatus::Unknown
551    };
552
553    // Build SystemInfo
554    let system_info = values.get("OS").map(|os| SystemInfo {
555        os: os.clone(),
556        arch: values.get("ARCH").cloned().unwrap_or_default(),
557        distro: values.get("DISTRO").cloned(),
558        has_cargo: values.get("HAS_CARGO").map(|v| v == "1").unwrap_or(false),
559        has_cargo_binstall: values
560            .get("HAS_BINSTALL")
561            .map(|v| v == "1")
562            .unwrap_or(false),
563        has_curl: values.get("HAS_CURL").map(|v| v == "1").unwrap_or(false),
564        has_wget: values.get("HAS_WGET").map(|v| v == "1").unwrap_or(false),
565        remote_home: values.get("HOME").cloned().unwrap_or_default(),
566        machine_id: values.get("MACHINE_ID").cloned().filter(|s| !s.is_empty()),
567    });
568
569    // Build ResourceInfo
570    let resources = {
571        let disk_kb = values
572            .get("DISK_AVAIL_KB")
573            .and_then(|s| s.parse::<u64>().ok())
574            .unwrap_or(0);
575        let mem_total_kb = values
576            .get("MEM_TOTAL_KB")
577            .and_then(|s| s.parse::<u64>().ok())
578            .unwrap_or(0);
579        let mem_avail_kb = values
580            .get("MEM_AVAIL_KB")
581            .and_then(|s| s.parse::<u64>().ok())
582            .unwrap_or(0);
583
584        if disk_kb > 0 || mem_total_kb > 0 {
585            let disk_mb = disk_kb / 1024;
586            let mem_total_mb = mem_total_kb / 1024;
587            let mem_avail_mb = mem_avail_kb / 1024;
588
589            Some(ResourceInfo {
590                disk_available_mb: disk_mb,
591                memory_total_mb: mem_total_mb,
592                memory_available_mb: mem_avail_mb,
593                can_compile: disk_mb >= ResourceInfo::MIN_DISK_MB
594                    && mem_total_mb >= ResourceInfo::MIN_MEMORY_MB,
595            })
596        } else {
597            None
598        }
599    };
600
601    // Build DetectedAgents
602    let detected_agents: Vec<DetectedAgent> = agent_data
603        .into_iter()
604        .map(|(path, size_mb, count)| {
605            let agent_type = infer_agent_type(&path);
606            DetectedAgent {
607                agent_type,
608                path,
609                estimated_sessions: Some(count),
610                estimated_size_mb: Some(size_mb),
611            }
612        })
613        .collect();
614
615    HostProbeResult {
616        host_name: host_name.to_string(),
617        reachable: true,
618        connection_time_ms,
619        cass_status,
620        detected_agents,
621        system_info,
622        resources,
623        error: None,
624    }
625}
626
627/// Infer agent type from path.
628///
629/// Note: More specific patterns must be checked first (e.g., `saoudrizwan.claude-dev`
630/// contains `claude` so Cline must be checked before Claude Code).
631fn infer_agent_type(path: &str) -> String {
632    // Check Cline first - it contains "claude-dev" which could match ".claude"
633    if path.contains("saoudrizwan.claude-dev") || path.contains("rooveterinaryinc.roo-cline") {
634        "cline".to_string()
635    } else if path.contains(".claude") {
636        "claude_code".to_string()
637    } else if path.contains(".codex") {
638        "codex".to_string()
639    } else if path.contains(".cursor") || path.contains("Cursor") {
640        "cursor".to_string()
641    } else if path.contains(".gemini") {
642        "gemini".to_string()
643    } else if path.contains("/.pi/") || path.ends_with("/.pi") {
644        "pi_agent".to_string()
645    } else if path.contains(".aider") {
646        "aider".to_string()
647    } else if path.contains("opencode") {
648        "opencode".to_string()
649    } else if path.contains(".goose") {
650        "goose".to_string()
651    } else if path.contains("copilot-chat")
652        || path.contains("gh-copilot")
653        || path.contains("gh/copilot")
654    {
655        "copilot".to_string()
656    } else if path.contains(".continue") {
657        "continue".to_string()
658    } else if path.contains("sourcegraph.amp") || path.contains("/amp/") || path.ends_with("/amp") {
659        "amp".to_string()
660    } else if path.contains(".clawdbot") {
661        "clawdbot".to_string()
662    } else if path.contains(".factory") {
663        "factory".to_string()
664    } else if path.contains(".vibe") {
665        "vibe".to_string()
666    } else if path.contains(".windsurf") {
667        "windsurf".to_string()
668    } else {
669        "unknown".to_string()
670    }
671}
672
673/// Probe multiple hosts in parallel.
674///
675/// Uses rayon's parallel iterator to probe hosts concurrently, calling the
676/// progress callback as each probe completes.
677///
678/// # Arguments
679/// * `hosts` - Slice of discovered hosts to probe
680/// * `timeout_secs` - Connection timeout per host
681/// * `on_progress` - Callback called after each host completes: (completed, total, host_name)
682///
683/// # Returns
684/// Vector of probe results for all hosts.
685pub fn probe_hosts_parallel<F>(
686    hosts: &[DiscoveredHost],
687    timeout_secs: u64,
688    on_progress: F,
689) -> Vec<HostProbeResult>
690where
691    F: Fn(usize, usize, &str) + Send + Sync,
692{
693    use rayon::prelude::*;
694    use std::sync::Arc;
695    use std::sync::atomic::{AtomicUsize, Ordering};
696
697    let total = hosts.len();
698    let completed = Arc::new(AtomicUsize::new(0));
699    let on_progress = Arc::new(on_progress);
700
701    // Use rayon for true parallel execution
702    hosts
703        .par_iter()
704        .map(|host| {
705            let result = probe_host(host, timeout_secs);
706
707            let done = completed.fetch_add(1, Ordering::SeqCst) + 1;
708            on_progress(done, total, &host.name);
709
710            result
711        })
712        .collect()
713}
714
715/// Cache for probe results to avoid repeated probing.
716///
717/// Note: Use `ProbeCache::new(ttl_secs)` to create a cache. The `Default`
718/// implementation uses a 5-minute TTL.
719#[derive(Debug)]
720pub struct ProbeCache {
721    results: HashMap<String, (HostProbeResult, std::time::Instant)>,
722    ttl_secs: u64,
723}
724
725impl Default for ProbeCache {
726    fn default() -> Self {
727        Self::new(300) // 5-minute default TTL
728    }
729}
730
731impl ProbeCache {
732    /// Create a new cache with the specified TTL in seconds.
733    pub fn new(ttl_secs: u64) -> Self {
734        Self {
735            results: HashMap::new(),
736            ttl_secs,
737        }
738    }
739
740    /// Get a cached result if still valid.
741    pub fn get(&self, host_name: &str) -> Option<&HostProbeResult> {
742        self.results.get(host_name).and_then(|(result, ts)| {
743            if ts.elapsed().as_secs() < self.ttl_secs {
744                Some(result)
745            } else {
746                None
747            }
748        })
749    }
750
751    /// Insert a result into the cache.
752    pub fn insert(&mut self, result: HostProbeResult) {
753        self.results.insert(
754            result.host_name.clone(),
755            (result, std::time::Instant::now()),
756        );
757    }
758
759    /// Clear expired entries.
760    pub fn clear_expired(&mut self) {
761        self.results
762            .retain(|_, (_, ts)| ts.elapsed().as_secs() < self.ttl_secs);
763    }
764}
765
766/// Deduplicate probe results that point to the same physical machine.
767///
768/// Multiple SSH aliases may point to the same machine. This function identifies
769/// duplicates using the machine_id from the probe and keeps only one entry per
770/// physical machine.
771///
772/// # Selection criteria (when duplicates found)
773/// 1. Prefer hosts with cass already installed
774/// 2. Prefer hosts with more sessions indexed
775/// 3. Otherwise, keep the first one alphabetically
776///
777/// # Returns
778/// A tuple of (deduplicated results, merged aliases map).
779/// The merged map contains: kept_host_name -> vec![merged_alias_names]
780pub fn deduplicate_probe_results(
781    results: Vec<HostProbeResult>,
782) -> (Vec<HostProbeResult>, HashMap<String, Vec<String>>) {
783    // Group by machine_id (skip hosts without machine_id - can't dedupe them)
784    let mut by_machine_id: HashMap<String, Vec<HostProbeResult>> = HashMap::new();
785    let mut no_machine_id: Vec<HostProbeResult> = Vec::new();
786
787    for result in results {
788        if let Some(ref machine_id) = result
789            .system_info
790            .as_ref()
791            .and_then(|s| s.machine_id.clone())
792        {
793            by_machine_id
794                .entry(machine_id.clone())
795                .or_default()
796                .push(result);
797        } else {
798            no_machine_id.push(result);
799        }
800    }
801
802    let mut deduplicated: Vec<HostProbeResult> = Vec::new();
803    let mut merged_aliases: HashMap<String, Vec<String>> = HashMap::new();
804
805    // Process groups with machine_id
806    for (_machine_id, mut group) in by_machine_id {
807        if group.len() == 1 {
808            deduplicated.push(group.remove(0));
809        } else {
810            // Multiple aliases for same machine - pick the best one
811            group.sort_by(|a, b| {
812                // 1. Prefer installed cass
813                let a_installed = a.cass_status.is_installed();
814                let b_installed = b.cass_status.is_installed();
815                if a_installed != b_installed {
816                    return b_installed.cmp(&a_installed);
817                }
818
819                // 2. Prefer more sessions
820                let a_sessions = match &a.cass_status {
821                    CassStatus::Indexed { session_count, .. } => *session_count,
822                    _ => 0,
823                };
824                let b_sessions = match &b.cass_status {
825                    CassStatus::Indexed { session_count, .. } => *session_count,
826                    _ => 0,
827                };
828                if a_sessions != b_sessions {
829                    return b_sessions.cmp(&a_sessions);
830                }
831
832                // 3. Alphabetically by name
833                a.host_name.cmp(&b.host_name)
834            });
835
836            // Keep the first (best) one, record others as merged
837            let kept = group.remove(0);
838            let merged: Vec<String> = group.into_iter().map(|h| h.host_name).collect();
839
840            if !merged.is_empty() {
841                merged_aliases.insert(kept.host_name.clone(), merged);
842            }
843            deduplicated.push(kept);
844        }
845    }
846
847    // Add back hosts without machine_id
848    deduplicated.extend(no_machine_id);
849
850    // Sort final list by name for consistent ordering
851    deduplicated.sort_by(|a, b| a.host_name.cmp(&b.host_name));
852
853    (deduplicated, merged_aliases)
854}
855
856#[cfg(test)]
857mod tests {
858    use super::*;
859
860    #[test]
861    fn test_cass_status_is_installed() {
862        assert!(
863            CassStatus::Indexed {
864                version: "0.1.50".into(),
865                session_count: 100,
866                last_indexed: None
867            }
868            .is_installed()
869        );
870
871        assert!(
872            CassStatus::InstalledNotIndexed {
873                version: "0.1.50".into()
874            }
875            .is_installed()
876        );
877
878        assert!(!CassStatus::NotFound.is_installed());
879        assert!(!CassStatus::Unknown.is_installed());
880    }
881
882    #[test]
883    fn test_cass_status_version() {
884        assert_eq!(
885            CassStatus::Indexed {
886                version: "0.1.50".into(),
887                session_count: 0,
888                last_indexed: None
889            }
890            .version(),
891            Some("0.1.50")
892        );
893
894        assert_eq!(
895            CassStatus::InstalledNotIndexed {
896                version: "0.1.49".into()
897            }
898            .version(),
899            Some("0.1.49")
900        );
901
902        assert_eq!(CassStatus::NotFound.version(), None);
903    }
904
905    #[test]
906    fn test_infer_agent_type() {
907        assert_eq!(infer_agent_type("~/.claude/projects"), "claude_code");
908        assert_eq!(infer_agent_type("~/.codex/sessions"), "codex");
909        assert_eq!(infer_agent_type("~/.cursor"), "cursor");
910        assert_eq!(infer_agent_type("~/.gemini/tmp"), "gemini");
911        assert_eq!(
912            infer_agent_type("~/.config/Code/User/globalStorage/saoudrizwan.claude-dev"),
913            "cline"
914        );
915        assert_eq!(
916            infer_agent_type("~/.config/Code/User/globalStorage/github.copilot-chat"),
917            "copilot"
918        );
919        assert_eq!(infer_agent_type("~/.config/gh-copilot"), "copilot");
920        assert_eq!(infer_agent_type("/some/random/path"), "unknown");
921    }
922
923    #[test]
924    fn test_parse_probe_output_success() {
925        let output = r#"
926===PROBE_START===
927OS=linux
928ARCH=x86_64
929HOME=/home/user
930DISTRO=Ubuntu 22.04
931CASS_VERSION=0.1.50
932CASS_HEALTH=OK
933CASS_SESSIONS=1234
934HAS_CARGO=1
935HAS_BINSTALL=0
936HAS_CURL=1
937HAS_WGET=1
938DISK_AVAIL_KB=52428800
939MEM_TOTAL_KB=16777216
940MEM_AVAIL_KB=8388608
941AGENT_DATA=~/.claude/projects|150|42
942AGENT_DATA=~/.codex/sessions|50|10
943===PROBE_END===
944"#;
945
946        let result = parse_probe_output("test-host", output, 100);
947
948        assert!(result.reachable);
949        assert_eq!(result.host_name, "test-host");
950        assert_eq!(result.connection_time_ms, 100);
951
952        // Check cass status
953        assert!(
954            matches!(&result.cass_status, CassStatus::Indexed { .. }),
955            "expected Indexed status"
956        );
957        if let CassStatus::Indexed {
958            version,
959            session_count,
960            ..
961        } = &result.cass_status
962        {
963            assert_eq!(version, "0.1.50");
964            assert_eq!(*session_count, 1234);
965        }
966
967        // Check system info
968        let sys = result.system_info.as_ref().unwrap();
969        assert_eq!(sys.os, "linux");
970        assert_eq!(sys.arch, "x86_64");
971        assert_eq!(sys.distro, Some("Ubuntu 22.04".into()));
972        assert!(sys.has_cargo);
973        assert!(!sys.has_cargo_binstall);
974        assert!(sys.has_curl);
975
976        // Check resources
977        let res = result.resources.as_ref().unwrap();
978        assert_eq!(res.disk_available_mb, 51200); // 52428800 / 1024
979        assert_eq!(res.memory_total_mb, 16384); // 16777216 / 1024
980        assert!(res.can_compile);
981
982        // Check detected agents
983        assert_eq!(result.detected_agents.len(), 2);
984        assert_eq!(result.detected_agents[0].agent_type, "claude_code");
985        assert_eq!(result.detected_agents[0].estimated_sessions, Some(42));
986        assert_eq!(result.detected_agents[1].agent_type, "codex");
987    }
988
989    #[test]
990    fn test_parse_probe_output_ignores_noise_outside_markers() {
991        let output = r#"
992CASS_VERSION=NOT_FOUND
993AGENT_DATA=/tmp/outside-before|999|999
994===PROBE_START===
995OS=linux
996ARCH=x86_64
997HOME=/home/user
998CASS_VERSION=0.4.2
999CASS_HEALTH=OK
1000CASS_SESSIONS=7
1001HAS_CARGO=1
1002HAS_BINSTALL=0
1003HAS_CURL=1
1004HAS_WGET=1
1005DISK_AVAIL_KB=2048000
1006MEM_TOTAL_KB=4096000
1007MEM_AVAIL_KB=1024000
1008===PROBE_END===
1009CASS_VERSION=NOT_FOUND
1010AGENT_DATA=/tmp/outside-after|999|999
1011"#;
1012
1013        let result = parse_probe_output("noisy-host", output, 42);
1014
1015        assert!(result.reachable);
1016        assert!(result.detected_agents.is_empty());
1017        assert!(matches!(
1018            result.cass_status,
1019            CassStatus::Indexed {
1020                ref version,
1021                session_count: 7,
1022                ..
1023            } if version == "0.4.2"
1024        ));
1025    }
1026
1027    #[test]
1028    fn test_parse_probe_output_cass_not_found() {
1029        let output = r#"
1030===PROBE_START===
1031OS=darwin
1032ARCH=arm64
1033HOME=/Users/user
1034CASS_VERSION=NOT_FOUND
1035HAS_CARGO=0
1036HAS_BINSTALL=0
1037HAS_CURL=1
1038HAS_WGET=0
1039DISK_AVAIL_KB=10240000
1040MEM_TOTAL_KB=8388608
1041MEM_AVAIL_KB=4194304
1042===PROBE_END===
1043"#;
1044
1045        let result = parse_probe_output("mac-host", output, 50);
1046
1047        assert!(result.reachable);
1048        assert!(matches!(result.cass_status, CassStatus::NotFound));
1049
1050        let sys = result.system_info.as_ref().unwrap();
1051        assert_eq!(sys.os, "darwin");
1052        assert_eq!(sys.arch, "arm64");
1053        assert!(!sys.has_cargo);
1054    }
1055
1056    #[test]
1057    fn test_parse_probe_output_malformed() {
1058        let output = "random garbage";
1059        let result = parse_probe_output("bad-host", output, 0);
1060
1061        assert!(!result.reachable);
1062        assert!(result.error.is_some());
1063    }
1064
1065    #[test]
1066    fn test_parse_probe_output_rejects_out_of_order_markers() {
1067        let output = r#"
1068===PROBE_END===
1069===PROBE_START===
1070OS=linux
1071CASS_VERSION=0.4.2
1072"#;
1073        let result = parse_probe_output("bad-host", output, 0);
1074
1075        assert!(!result.reachable);
1076        assert!(result.error.is_some());
1077    }
1078
1079    #[test]
1080    fn test_host_probe_result_unreachable() {
1081        let result = HostProbeResult::unreachable("test", "Connection refused");
1082
1083        assert!(!result.reachable);
1084        assert_eq!(result.error, Some("Connection refused".into()));
1085        assert!(!result.has_cass());
1086        assert!(!result.has_agent_data());
1087    }
1088
1089    #[test]
1090    fn test_probe_cache() {
1091        let mut cache = ProbeCache::new(300); // 5 minute TTL
1092
1093        let result = HostProbeResult {
1094            host_name: "test".into(),
1095            reachable: true,
1096            connection_time_ms: 100,
1097            cass_status: CassStatus::NotFound,
1098            detected_agents: vec![],
1099            system_info: None,
1100            resources: None,
1101            error: None,
1102        };
1103
1104        cache.insert(result);
1105
1106        assert!(cache.get("test").is_some());
1107        assert!(cache.get("nonexistent").is_none());
1108    }
1109
1110    #[test]
1111    fn test_resource_info_can_compile() {
1112        let good = ResourceInfo {
1113            disk_available_mb: 2000,
1114            memory_total_mb: 4000,
1115            memory_available_mb: 2000,
1116            can_compile: true,
1117        };
1118        assert!(good.can_compile);
1119
1120        let low_disk = ResourceInfo {
1121            disk_available_mb: 500,
1122            memory_total_mb: 4000,
1123            memory_available_mb: 2000,
1124            can_compile: false,
1125        };
1126        assert!(!low_disk.can_compile);
1127    }
1128
1129    // =========================================================================
1130    // Real system probe tests — run PROBE_SCRIPT locally without SSH
1131    // =========================================================================
1132
1133    /// Execute a probe script on the local system via bash, returning stdout.
1134    fn run_probe_script_with_home(script: &str, home: Option<&std::path::Path>) -> String {
1135        use std::io::Write;
1136        let mut cmd = Command::new("bash");
1137        cmd.arg("-s")
1138            .stdin(Stdio::piped())
1139            .stdout(Stdio::piped())
1140            .stderr(Stdio::piped());
1141        if let Some(home) = home {
1142            cmd.env("HOME", home);
1143        } else if dotenvy::var("HOME").is_err()
1144            && let Some(dirs) = directories::BaseDirs::new()
1145        {
1146            // Ensure HOME is set for the probe script (may not be set in some test environments).
1147            cmd.env("HOME", dirs.home_dir());
1148        }
1149        let mut child = cmd.spawn().expect("bash should be available");
1150        if let Some(mut stdin) = child.stdin.take() {
1151            stdin
1152                .write_all(script.as_bytes())
1153                .expect("write probe script");
1154        }
1155        let output = child
1156            .wait_with_output()
1157            .expect("probe script should finish");
1158        assert!(
1159            output.status.success(),
1160            "probe script failed: {:?}",
1161            String::from_utf8_lossy(&output.stderr)
1162        );
1163        String::from_utf8_lossy(&output.stdout).to_string()
1164    }
1165
1166    /// Execute PROBE_SCRIPT on the local system via bash, returning stdout.
1167    fn run_probe_script_locally() -> String {
1168        run_probe_script_with_home(&build_probe_script(), None)
1169    }
1170
1171    #[test]
1172    fn shell_single_quote_arg_quotes_shell_metacharacters() {
1173        assert_eq!(shell_single_quote_arg("plain/path"), "'plain/path'");
1174        assert_eq!(shell_single_quote_arg("can't"), "'can'\\''t'");
1175        assert_eq!(
1176            shell_single_quote_arg("$(touch /tmp/nope); `whoami`"),
1177            "'$(touch /tmp/nope); `whoami`'"
1178        );
1179    }
1180
1181    #[test]
1182    fn probe_script_uses_literal_array_without_eval() {
1183        let script = build_probe_script();
1184        assert!(script.contains("PROBE_DIRS=("));
1185        assert!(script.contains("for dir in \"${PROBE_DIRS[@]}\""));
1186        assert!(script.contains("expanded_dir=\"$HOME/${dir#\\~/}\""));
1187        assert!(
1188            !script.contains("eval echo"),
1189            "probe paths must not be expanded through eval"
1190        );
1191    }
1192
1193    #[test]
1194    #[cfg(not(windows))]
1195    fn probe_script_treats_special_probe_paths_as_literals() {
1196        let home = tempfile::tempdir().expect("temp home");
1197        let relative_path =
1198            "Library/Application Support/Codex$(touch \"$HOME/SHOULD_NOT_EXIST\");can't";
1199        std::fs::create_dir_all(home.path().join(relative_path)).expect("create special path");
1200
1201        let probe_path = format!("~/{relative_path}");
1202        let script = build_probe_script_for_dirs(std::slice::from_ref(&probe_path));
1203        let output = run_probe_script_with_home(&script, Some(home.path()));
1204
1205        assert!(
1206            output.contains(&format!("AGENT_DATA={probe_path}|")),
1207            "special probe path should be reported literally: {output}"
1208        );
1209        assert!(
1210            !home.path().join("SHOULD_NOT_EXIST").exists(),
1211            "probe path interpolation must not execute command substitutions"
1212        );
1213
1214        let result = parse_probe_output("localhost", &output, 0);
1215        assert!(
1216            result
1217                .detected_agents
1218                .iter()
1219                .any(|agent| agent.path == probe_path),
1220            "parsed agent data should preserve literal path"
1221        );
1222    }
1223
1224    #[test]
1225    #[cfg(not(windows))]
1226    fn real_probe_script_produces_valid_markers() {
1227        let output = run_probe_script_locally();
1228        assert!(
1229            output.contains("===PROBE_START==="),
1230            "missing PROBE_START marker"
1231        );
1232        assert!(
1233            output.contains("===PROBE_END==="),
1234            "missing PROBE_END marker"
1235        );
1236    }
1237
1238    #[test]
1239    #[cfg(not(windows))]
1240    fn real_probe_script_parses_into_reachable_result() {
1241        let output = run_probe_script_locally();
1242        let result = parse_probe_output("localhost", &output, 0);
1243        assert!(
1244            result.reachable,
1245            "local probe should be reachable: {:?}",
1246            result.error
1247        );
1248        assert!(result.system_info.is_some(), "should have system info");
1249        assert!(result.resources.is_some(), "should have resource info");
1250    }
1251
1252    #[test]
1253    #[cfg(not(windows))]
1254    fn real_probe_system_info_has_valid_os() {
1255        let output = run_probe_script_locally();
1256        let result = parse_probe_output("localhost", &output, 0);
1257        let sys = result.system_info.as_ref().expect("system_info");
1258        assert!(
1259            sys.os == "linux" || sys.os == "darwin",
1260            "OS should be linux or darwin, got: {}",
1261            sys.os
1262        );
1263    }
1264
1265    #[test]
1266    #[cfg(not(windows))]
1267    fn real_probe_system_info_has_valid_arch() {
1268        let output = run_probe_script_locally();
1269        let result = parse_probe_output("localhost", &output, 0);
1270        let sys = result.system_info.as_ref().expect("system_info");
1271        let valid_archs = [
1272            "x86_64", "aarch64", "arm64", "armv7l", "i686", "s390x", "ppc64le",
1273        ];
1274        assert!(
1275            valid_archs.contains(&sys.arch.as_str()),
1276            "arch should be a known value, got: {}",
1277            sys.arch
1278        );
1279    }
1280
1281    #[test]
1282    #[cfg(not(windows))]
1283    fn real_probe_system_info_has_nonempty_home() {
1284        let output = run_probe_script_locally();
1285        let result = parse_probe_output("localhost", &output, 0);
1286        let sys = result.system_info.as_ref().expect("system_info");
1287        assert!(!sys.remote_home.is_empty(), "home should not be empty");
1288        assert!(
1289            sys.remote_home.starts_with('/'),
1290            "home should be absolute: {}",
1291            sys.remote_home
1292        );
1293    }
1294
1295    #[test]
1296    #[cfg(not(windows))]
1297    fn real_probe_resources_have_nonzero_disk() {
1298        let output = run_probe_script_locally();
1299        let result = parse_probe_output("localhost", &output, 0);
1300        let res = result.resources.as_ref().expect("resources");
1301        assert!(res.disk_available_mb > 0, "disk_available_mb should be > 0");
1302    }
1303
1304    #[test]
1305    #[cfg(not(windows))]
1306    fn real_probe_resources_have_nonzero_memory() {
1307        let output = run_probe_script_locally();
1308        let result = parse_probe_output("localhost", &output, 0);
1309        let res = result.resources.as_ref().expect("resources");
1310        assert!(res.memory_total_mb > 0, "memory_total_mb should be > 0");
1311        assert!(
1312            res.memory_available_mb > 0,
1313            "memory_available_mb should be > 0"
1314        );
1315    }
1316
1317    #[test]
1318    #[cfg(not(windows))]
1319    fn real_probe_resources_memory_invariant() {
1320        let output = run_probe_script_locally();
1321        let result = parse_probe_output("localhost", &output, 0);
1322        let res = result.resources.as_ref().expect("resources");
1323        assert!(
1324            res.memory_available_mb <= res.memory_total_mb,
1325            "available memory ({}) should not exceed total ({})",
1326            res.memory_available_mb,
1327            res.memory_total_mb
1328        );
1329    }
1330
1331    #[test]
1332    #[cfg(not(windows))]
1333    fn real_probe_resources_can_compile_reflects_thresholds() {
1334        let output = run_probe_script_locally();
1335        let result = parse_probe_output("localhost", &output, 0);
1336        let res = result.resources.as_ref().expect("resources");
1337        let expected = res.disk_available_mb >= ResourceInfo::MIN_DISK_MB
1338            && res.memory_total_mb >= ResourceInfo::MIN_MEMORY_MB;
1339        assert_eq!(
1340            res.can_compile, expected,
1341            "can_compile should match threshold check: disk={}MB mem={}MB",
1342            res.disk_available_mb, res.memory_total_mb
1343        );
1344    }
1345
1346    #[test]
1347    #[cfg(not(windows))]
1348    fn real_probe_tool_detection_is_consistent() {
1349        let output = run_probe_script_locally();
1350        let result = parse_probe_output("localhost", &output, 0);
1351        let sys = result.system_info.as_ref().expect("system_info");
1352        // If cargo-binstall is available, cargo must also be available
1353        if sys.has_cargo_binstall {
1354            assert!(sys.has_cargo, "binstall requires cargo");
1355        }
1356        // At least one download tool should exist on any modern system
1357        assert!(
1358            sys.has_curl || sys.has_wget,
1359            "system should have at least curl or wget"
1360        );
1361    }
1362
1363    #[test]
1364    fn probe_script_contains_all_franken_agent_detection_paths() {
1365        let script = build_probe_script();
1366        // Verify key agent paths from franken_agent_detection are present
1367        assert!(script.contains("~/.claude"), "missing claude paths");
1368        assert!(script.contains("~/.codex/sessions"), "missing codex path");
1369        assert!(script.contains("~/.gemini"), "missing gemini paths");
1370        assert!(script.contains("~/.goose/sessions"), "missing goose path");
1371        assert!(
1372            script.contains("~/.continue/sessions"),
1373            "missing continue path"
1374        );
1375        assert!(script.contains("~/.aider"), "missing aider path");
1376        assert!(
1377            script.contains("saoudrizwan.claude-dev"),
1378            "missing cline path"
1379        );
1380        assert!(script.contains("copilot-chat"), "missing copilot path");
1381        assert!(script.contains("~/.windsurf"), "missing windsurf path");
1382        assert!(script.contains("~/.factory"), "missing factory path");
1383        assert!(script.contains("~/.clawdbot"), "missing clawdbot path");
1384        assert!(script.contains("~/.vibe"), "missing vibe path");
1385        assert!(script.contains("sourcegraph.amp"), "missing amp path");
1386        // Verify script structure
1387        assert!(script.contains("===PROBE_START==="));
1388        assert!(script.contains("===PROBE_END==="));
1389        assert!(script.contains("for dir in \"${PROBE_DIRS[@]}\""));
1390    }
1391
1392    #[test]
1393    fn infer_agent_type_covers_all_dynamic_agents() {
1394        // Ensure infer_agent_type handles all agents from franken_agent_detection
1395        assert_eq!(infer_agent_type("~/.goose/sessions"), "goose");
1396        assert_eq!(infer_agent_type("~/.continue/sessions"), "continue");
1397        assert_eq!(infer_agent_type("~/.clawdbot/sessions"), "clawdbot");
1398        assert_eq!(infer_agent_type("~/.factory/sessions"), "factory");
1399        assert_eq!(infer_agent_type("~/.vibe/logs/session"), "vibe");
1400        assert_eq!(infer_agent_type("~/.windsurf"), "windsurf");
1401        assert_eq!(
1402            infer_agent_type("~/.config/Code/User/globalStorage/sourcegraph.amp"),
1403            "amp"
1404        );
1405        assert_eq!(infer_agent_type("~/.pi/agent/sessions"), "pi_agent");
1406    }
1407
1408    // =========================================================================
1409    // Deduplication tests
1410    // =========================================================================
1411
1412    fn make_probe_result(
1413        name: &str,
1414        machine_id: Option<&str>,
1415        sessions: Option<u64>,
1416    ) -> HostProbeResult {
1417        HostProbeResult {
1418            host_name: name.to_string(),
1419            reachable: true,
1420            connection_time_ms: 100,
1421            cass_status: if let Some(s) = sessions {
1422                CassStatus::Indexed {
1423                    version: "0.1.50".into(),
1424                    session_count: s,
1425                    last_indexed: None,
1426                }
1427            } else {
1428                CassStatus::NotFound
1429            },
1430            detected_agents: vec![],
1431            system_info: Some(SystemInfo {
1432                os: "linux".into(),
1433                arch: "x86_64".into(),
1434                distro: Some("Ubuntu 25.10".into()),
1435                has_cargo: true,
1436                has_cargo_binstall: false,
1437                has_curl: true,
1438                has_wget: true,
1439                remote_home: "/home/ubuntu".into(),
1440                machine_id: machine_id.map(String::from),
1441            }),
1442            resources: Some(ResourceInfo {
1443                disk_available_mb: 800_000,
1444                memory_total_mb: 16_000,
1445                memory_available_mb: 8_000,
1446                can_compile: true,
1447            }),
1448            error: None,
1449        }
1450    }
1451
1452    #[test]
1453    fn test_deduplicate_no_duplicates() {
1454        let results = vec![
1455            make_probe_result("host1", Some("machine-1"), Some(100)),
1456            make_probe_result("host2", Some("machine-2"), Some(200)),
1457        ];
1458
1459        let (deduped, merged) = deduplicate_probe_results(results);
1460
1461        assert_eq!(deduped.len(), 2);
1462        assert!(merged.is_empty());
1463    }
1464
1465    #[test]
1466    fn test_deduplicate_same_machine() {
1467        // Two SSH aliases for the same machine
1468        let results = vec![
1469            make_probe_result("jain", Some("abc123"), None),
1470            make_probe_result("jain_ovh_box", Some("abc123"), None),
1471        ];
1472
1473        let (deduped, merged) = deduplicate_probe_results(results);
1474
1475        assert_eq!(deduped.len(), 1);
1476        // Should keep "jain" (alphabetically first since neither has cass)
1477        assert_eq!(deduped[0].host_name, "jain");
1478        assert_eq!(
1479            merged.get("jain").unwrap(),
1480            &vec!["jain_ovh_box".to_string()]
1481        );
1482    }
1483
1484    #[test]
1485    fn test_deduplicate_prefers_installed_cass() {
1486        // Two aliases, one with cass installed
1487        let results = vec![
1488            make_probe_result("alias_a", Some("machine-x"), None), // no cass
1489            make_probe_result("alias_b", Some("machine-x"), Some(500)), // has cass
1490        ];
1491
1492        let (deduped, merged) = deduplicate_probe_results(results);
1493
1494        assert_eq!(deduped.len(), 1);
1495        // Should keep alias_b because it has cass installed
1496        assert_eq!(deduped[0].host_name, "alias_b");
1497        assert!(merged.contains_key("alias_b"));
1498    }
1499
1500    #[test]
1501    fn test_deduplicate_prefers_more_sessions() {
1502        // Both have cass, but different session counts
1503        let results = vec![
1504            make_probe_result("host_low", Some("machine-y"), Some(50)),
1505            make_probe_result("host_high", Some("machine-y"), Some(500)),
1506        ];
1507
1508        let (deduped, merged) = deduplicate_probe_results(results);
1509
1510        assert_eq!(deduped.len(), 1);
1511        // Should keep host_high because it has more sessions
1512        assert_eq!(deduped[0].host_name, "host_high");
1513        // Verify the merge recorded the merged alias
1514        assert!(merged.contains_key("host_high"));
1515    }
1516
1517    #[test]
1518    fn test_deduplicate_no_machine_id_not_merged() {
1519        // Hosts without machine_id should not be merged
1520        let results = vec![
1521            make_probe_result("host1", None, Some(100)),
1522            make_probe_result("host2", None, Some(200)),
1523        ];
1524
1525        let (deduped, merged) = deduplicate_probe_results(results);
1526
1527        assert_eq!(deduped.len(), 2);
1528        assert!(merged.is_empty());
1529    }
1530
1531    #[test]
1532    fn test_deduplicate_mixed_with_and_without_machine_id() {
1533        let results = vec![
1534            make_probe_result("aliasA", Some("same-machine"), Some(100)),
1535            make_probe_result("aliasB", Some("same-machine"), Some(50)),
1536            make_probe_result("standalone", None, Some(75)),
1537        ];
1538
1539        let (deduped, merged) = deduplicate_probe_results(results);
1540
1541        // 2 hosts: one from deduplication, one standalone
1542        assert_eq!(deduped.len(), 2);
1543        // aliasA should be kept (more sessions)
1544        assert!(deduped.iter().any(|h| h.host_name == "aliasA"));
1545        assert!(deduped.iter().any(|h| h.host_name == "standalone"));
1546        assert_eq!(merged.len(), 1);
1547    }
1548
1549    #[test]
1550    fn test_deduplicate_three_aliases_same_machine() {
1551        let results = vec![
1552            make_probe_result("alias1", Some("same"), Some(100)),
1553            make_probe_result("alias2", Some("same"), Some(200)),
1554            make_probe_result("alias3", Some("same"), Some(150)),
1555        ];
1556
1557        let (deduped, merged) = deduplicate_probe_results(results);
1558
1559        assert_eq!(deduped.len(), 1);
1560        // alias2 has the most sessions
1561        assert_eq!(deduped[0].host_name, "alias2");
1562        // The merged list should contain the other two aliases
1563        let merged_list = merged.get("alias2").unwrap();
1564        assert_eq!(merged_list.len(), 2);
1565        assert!(merged_list.contains(&"alias1".to_string()));
1566        assert!(merged_list.contains(&"alias3".to_string()));
1567    }
1568
1569    #[test]
1570    #[cfg(not(windows))]
1571    fn real_probe_machine_id_present() {
1572        // Test that the local probe script actually collects machine_id
1573        let output = run_probe_script_locally();
1574        let result = parse_probe_output("localhost", &output, 0);
1575        let sys = result.system_info.as_ref().expect("system_info");
1576
1577        // On Linux or macOS, we should get a machine_id
1578        // (this test may be skipped on unusual systems)
1579        if sys.os == "linux" || sys.os == "darwin" {
1580            assert!(
1581                sys.machine_id.is_some(),
1582                "machine_id should be present on {}",
1583                sys.os
1584            );
1585            let mid = sys.machine_id.as_ref().unwrap();
1586            assert!(!mid.is_empty(), "machine_id should not be empty");
1587            // Machine IDs are typically 32+ hex chars or UUID format
1588            assert!(
1589                mid.len() >= 32,
1590                "machine_id should be at least 32 chars, got: {}",
1591                mid
1592            );
1593        }
1594    }
1595}