openlatch-provider 0.2.1

Self-service onboarding CLI + runtime daemon for OpenLatch Editors and Providers
//! Client-side endpoint validation — `bindings probe` and the pre-flight
//! check inside `register`. Catches obvious misconfigurations before the
//! manifest hits the platform's authoritative probe.
//!
//! Coverage in v0.1 (per `phase-1-editor-cli.md` task P1.T5):
//!   1. URL parsing
//!   2. Scheme must be `https`
//!   3. Port must be 443
//!   4. Host must resolve to a public IP only
//!   5. Reject cloud-metadata IPs (169.254.169.254, fd00:ec2::254, etc.)
//!   6. Reject IPv4-mapped IPv6
//!
//! Deferred to P3 (latency budget too tight for the v0.1 deadline):
//!   - TLS handshake validation (require ≥ TLS 1.2)
//!   - HTTP redirect-policy check
//!   - Synthetic event probe (POST a canned event, expect 2xx)
//!   - 10-call serial latency probe (measured p95 ≤ declared * 3)
//!
//! The platform's authoritative probe at `register` time covers the gaps —
//! this module is best-effort early-feedback.

use std::net::{IpAddr, Ipv4Addr, ToSocketAddrs};

use serde::Serialize;

use crate::error::{
    OlError, OL_4212_INVALID_ENDPOINT_URL, OL_4240_ENDPOINT_NOT_HTTPS, OL_4241_PRIVATE_IP,
    OL_4244_SYNTHETIC_PROBE_FAILED, OL_4246_CLOUD_METADATA_IP, OL_4247_IPV4_MAPPED_V6,
};

/// Findings collected from a single probe pass.
#[derive(Debug, Default, Clone, Serialize)]
pub struct ProbeReport {
    pub endpoint_url: String,
    pub host: Option<String>,
    pub resolved_ips: Vec<String>,
    pub passed: bool,
    pub findings: Vec<ProbeFinding>,
}

#[derive(Debug, Clone, Serialize)]
pub struct ProbeFinding {
    pub severity: ProbeSeverity,
    pub code: String,
    pub message: String,
}

#[derive(Debug, Clone, Copy, Serialize, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum ProbeSeverity {
    Error,
    Warning,
    Info,
}

/// Probe options. Defaults reflect production posture (public-IP only,
/// cloud-metadata blocked, no IPv4-mapped). Local-dev callers may relax with
/// [`ProbeOpts::allow_local`].
#[derive(Debug, Default, Clone, Copy)]
pub struct ProbeOpts {
    /// Permit endpoints that resolve to non-public IPs (loopback / private /
    /// cloud-metadata / IPv4-mapped). Intended for `--allow-local-endpoints`
    /// flows in `register` / `bindings probe` against a dev tool server.
    pub allow_local: bool,
}

/// Run the probe synchronously. Returns a [`ProbeReport`] even on partial
/// failure (so SARIF rendering surfaces every finding) but also returns the
/// first error encountered as the `Result::Err` so command sites can surface
/// the right OL-42xx exit code.
pub fn probe(endpoint_url: &str) -> Result<ProbeReport, OlError> {
    probe_with_opts(endpoint_url, ProbeOpts::default())
}

pub fn probe_with_opts(endpoint_url: &str, opts: ProbeOpts) -> Result<ProbeReport, OlError> {
    let mut report = ProbeReport {
        endpoint_url: endpoint_url.to_string(),
        ..Default::default()
    };

    // --- URL parsing -----------------------------------------------------
    let parsed = match url_parse(endpoint_url) {
        Some(p) => p,
        None => {
            let msg = format!("could not parse endpoint_url `{endpoint_url}`");
            report.findings.push(ProbeFinding {
                severity: ProbeSeverity::Error,
                code: "OL-4212".into(),
                message: msg.clone(),
            });
            return Err(OlError::new(OL_4212_INVALID_ENDPOINT_URL, msg));
        }
    };

    // --- HTTPS only ------------------------------------------------------
    if parsed.scheme != "https" {
        let msg = format!("endpoint scheme must be `https`, got `{}`", parsed.scheme);
        report.findings.push(ProbeFinding {
            severity: ProbeSeverity::Error,
            code: "OL-4240".into(),
            message: msg.clone(),
        });
        return Err(OlError::new(OL_4240_ENDPOINT_NOT_HTTPS, msg));
    }

    // --- Port 443 only ---------------------------------------------------
    let effective_port = parsed.port.unwrap_or(443);
    if effective_port != 443 {
        let msg = format!("endpoint port must be 443, got {effective_port}");
        report.findings.push(ProbeFinding {
            severity: ProbeSeverity::Error,
            code: "OL-4240".into(),
            message: msg.clone(),
        });
        return Err(OlError::new(OL_4240_ENDPOINT_NOT_HTTPS, msg));
    }

    // --- DNS resolution + IP allow-list ----------------------------------
    report.host = Some(parsed.host.clone());
    let addrs = match (parsed.host.as_str(), effective_port).to_socket_addrs() {
        Ok(a) => a,
        Err(e) => {
            let msg = format!("DNS resolution failed for `{}`: {e}", parsed.host);
            report.findings.push(ProbeFinding {
                severity: ProbeSeverity::Error,
                code: "OL-4244".into(),
                message: msg.clone(),
            });
            return Err(OlError::new(OL_4244_SYNTHETIC_PROBE_FAILED, msg));
        }
    };

    for addr in addrs {
        let ip = addr.ip();
        report.resolved_ips.push(ip.to_string());

        if opts.allow_local {
            // Skip public-IP / cloud-metadata / IPv4-mapped checks. Scheme,
            // port, and DNS-resolution checks above still apply.
            continue;
        }

        if is_cloud_metadata(ip) {
            let msg = format!("endpoint resolves to cloud-metadata IP `{ip}` (e.g. AWS IMDS)");
            report.findings.push(ProbeFinding {
                severity: ProbeSeverity::Error,
                code: "OL-4246".into(),
                message: msg.clone(),
            });
            return Err(OlError::new(OL_4246_CLOUD_METADATA_IP, msg));
        }
        if is_ipv4_mapped_v6(ip) {
            let msg = format!("endpoint resolves to an IPv4-mapped IPv6 address `{ip}`");
            report.findings.push(ProbeFinding {
                severity: ProbeSeverity::Error,
                code: "OL-4247".into(),
                message: msg.clone(),
            });
            return Err(OlError::new(OL_4247_IPV4_MAPPED_V6, msg));
        }
        if !is_public_ip(ip) {
            let msg = format!("endpoint resolves to non-public IP `{ip}`");
            report.findings.push(ProbeFinding {
                severity: ProbeSeverity::Error,
                code: "OL-4241".into(),
                message: msg.clone(),
            });
            return Err(OlError::new(OL_4241_PRIVATE_IP, msg).with_suggestion(
                "endpoints must be reachable from the public internet — point at \
                 a public hostname behind your TLS-terminating proxy.",
            ));
        }
    }

    report.passed = true;
    report.findings.push(ProbeFinding {
        severity: ProbeSeverity::Info,
        code: "probe.ok".into(),
        message: "URL, scheme, port, and DNS resolution checks passed".into(),
    });

    Ok(report)
}

#[derive(Debug, Clone)]
struct ParsedUrl {
    scheme: String,
    host: String,
    port: Option<u16>,
}

fn url_parse(url: &str) -> Option<ParsedUrl> {
    let (scheme, rest) = url.split_once("://")?;
    if rest.is_empty() {
        return None;
    }
    let host_part = rest.split('/').next().unwrap_or("");
    if host_part.is_empty() {
        return None;
    }
    let (host, port) = if let Some(stripped) = host_part.strip_prefix('[') {
        // IPv6 literal — `[::1]:443` form.
        let close = stripped.find(']')?;
        let host = stripped[..close].to_string();
        let after = &stripped[close + 1..];
        let port = after.strip_prefix(':').and_then(|p| p.parse::<u16>().ok());
        (host, port)
    } else if let Some((h, p)) = host_part.rsplit_once(':') {
        let port = p.parse::<u16>().ok();
        (h.to_string(), port)
    } else {
        (host_part.to_string(), None)
    };
    Some(ParsedUrl {
        scheme: scheme.to_ascii_lowercase(),
        host,
        port,
    })
}

fn is_public_ip(ip: IpAddr) -> bool {
    if ip.is_loopback() || ip.is_unspecified() || ip.is_multicast() {
        return false;
    }
    match ip {
        IpAddr::V4(v4) => {
            !(v4.is_private() || v4.is_link_local() || v4.is_broadcast() || v4.is_documentation())
        }
        IpAddr::V6(v6) => {
            // Unique local (fc00::/7), link-local (fe80::/10), documentation.
            let segments = v6.segments();
            let first = segments[0];
            !(first & 0xfe00 == 0xfc00 || first & 0xffc0 == 0xfe80)
        }
    }
}

fn is_cloud_metadata(ip: IpAddr) -> bool {
    match ip {
        IpAddr::V4(v4) => {
            // AWS / GCP / Azure / Alibaba — all serve metadata at 169.254.169.254.
            v4 == Ipv4Addr::new(169, 254, 169, 254)
        }
        IpAddr::V6(v6) => {
            let s = v6.segments();
            // AWS IMDSv2 IPv6 endpoint fd00:ec2::254 (per AWS docs).
            s == [
                0xfd00, 0x00ec, 0x0002, 0x0000, 0x0000, 0x0000, 0x0000, 0x0254,
            ]
        }
    }
}

fn is_ipv4_mapped_v6(ip: IpAddr) -> bool {
    matches!(ip, IpAddr::V6(v6) if v6.to_ipv4_mapped().is_some())
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn https_443_with_public_host_passes() {
        // example.com almost always resolves to a public address; if DNS is
        // offline the test bails informatively rather than failing CI.
        match probe("https://example.com/v1/event") {
            Ok(report) => {
                assert!(report.passed);
                assert!(!report.resolved_ips.is_empty());
            }
            Err(e) if e.code.code == "OL-4244" => {
                eprintln!("DNS unavailable in test env — skipping");
            }
            Err(e) => panic!("unexpected: {e}"),
        }
    }

    #[test]
    fn http_scheme_rejected() {
        let err = probe("http://example.com/v1/event").unwrap_err();
        assert_eq!(err.code.code, "OL-4240");
    }

    #[test]
    fn non_443_port_rejected() {
        let err = probe("https://example.com:8080/v1/event").unwrap_err();
        assert_eq!(err.code.code, "OL-4240");
    }

    #[test]
    fn malformed_url_rejected() {
        let err = probe("not a url").unwrap_err();
        assert_eq!(err.code.code, "OL-4212");
    }

    #[test]
    fn loopback_ip_rejected() {
        // 127.0.0.1 → the URL parser keeps the literal as host; resolution
        // gives back the loopback and the public-IP check fires.
        let err = probe("https://127.0.0.1/v1/event").unwrap_err();
        // Explicit non-443 port would also have caught it; with 443 here we
        // expect the private-IP path.
        assert!(
            err.code.code == "OL-4241" || err.code.code == "OL-4240",
            "expected OL-4241/OL-4240, got {}",
            err.code.code
        );
    }

    #[test]
    fn cloud_metadata_v4_recognised() {
        assert!(is_cloud_metadata(IpAddr::V4(Ipv4Addr::new(
            169, 254, 169, 254
        ))));
        assert!(!is_cloud_metadata(IpAddr::V4(Ipv4Addr::new(8, 8, 8, 8))));
    }

    #[test]
    fn ipv4_mapped_v6_recognised() {
        let mapped: IpAddr = "::ffff:192.0.2.128".parse().unwrap();
        assert!(is_ipv4_mapped_v6(mapped));
    }

    #[test]
    fn private_ip_recognised() {
        assert!(!is_public_ip(IpAddr::V4(Ipv4Addr::new(10, 0, 0, 1))));
        assert!(!is_public_ip(IpAddr::V4(Ipv4Addr::new(192, 168, 1, 1))));
        assert!(is_public_ip(IpAddr::V4(Ipv4Addr::new(8, 8, 8, 8))));
    }

    #[test]
    fn allow_local_bypasses_private_ip_check() {
        let report = probe_with_opts(
            "https://127.0.0.1/v1/event",
            ProbeOpts { allow_local: true },
        )
        .expect("loopback should pass under --allow-local-endpoints");
        assert!(report.passed);
        assert!(!report.resolved_ips.is_empty());
    }

    #[test]
    fn allow_local_still_rejects_non_https() {
        let err = probe_with_opts("http://127.0.0.1/v1/event", ProbeOpts { allow_local: true })
            .unwrap_err();
        assert_eq!(err.code.code, "OL-4240");
    }
}