soth-mitm 0.3.0

Rust intercepting proxy crate with deterministic handler/event contracts for SOTH.
Documentation
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV4, SocketAddrV6};
use std::path::PathBuf;

use crate::observe::FlowContext;
use crate::protocol::ApplicationProtocol;

use crate::runtime::connection_id::connection_id_for_flow_id;
use crate::types::{
    ConnectionInfo, ConnectionMeta, ProcessInfo, SocketFamily, TlsClientFingerprint, TlsInfo,
};

pub(crate) fn connection_meta_from_accept_context(
    context: &FlowContext,
    process_info: Option<ProcessInfo>,
    fingerprint: Option<&TlsClientFingerprint>,
) -> ConnectionMeta {
    let mut tls_info = tls_info_from_flow_context(context);

    // Merge JA4 fingerprint data if available.
    if let Some(fp) = fingerprint {
        match tls_info.as_mut() {
            Some(info) => {
                info.ja4_hash = Some(fp.ja4.clone());
                info.tls_version = Some(fp.tls_version);
            }
            None => {
                tls_info = Some(TlsInfo {
                    sni: None,
                    negotiated_proto: None,
                    ja4_hash: Some(fp.ja4.clone()),
                    tls_version: Some(fp.tls_version),
                });
            }
        }
    }

    ConnectionMeta {
        connection_id: connection_id_for_flow_id(context.flow_id),
        socket_family: socket_family_from_flow_context(context),
        process_info,
        tls_info,
        h2_connection_id: None,
        h2_stream_id: None,
    }
}

pub(crate) fn socket_family_from_flow_context(context: &FlowContext) -> SocketFamily {
    if let Some(meta) = parse_unix_client_addr_meta(&context.client_addr) {
        return SocketFamily::UnixDomain { path: meta.path };
    }
    let local = context.client_addr.parse::<SocketAddr>().ok();
    match local {
        Some(SocketAddr::V4(local_v4)) => SocketFamily::TcpV4 {
            local: local_v4,
            remote: SocketAddrV4::new(
                context
                    .server_host
                    .parse::<Ipv4Addr>()
                    .unwrap_or(Ipv4Addr::UNSPECIFIED),
                context.server_port,
            ),
        },
        Some(SocketAddr::V6(local_v6)) => SocketFamily::TcpV6 {
            local: local_v6,
            remote: SocketAddrV6::new(
                context
                    .server_host
                    .parse::<Ipv6Addr>()
                    .unwrap_or(Ipv6Addr::UNSPECIFIED),
                context.server_port,
                0,
                0,
            ),
        },
        None => SocketFamily::TcpV4 {
            local: SocketAddrV4::new(Ipv4Addr::UNSPECIFIED, 0),
            remote: SocketAddrV4::new(
                context
                    .server_host
                    .parse::<Ipv4Addr>()
                    .unwrap_or(Ipv4Addr::UNSPECIFIED),
                context.server_port,
            ),
        },
    }
}

pub(crate) fn lookup_connection_info_from_flow_context(context: &FlowContext) -> ConnectionInfo {
    let socket_family = socket_family_from_flow_context(context);
    let (alpn_protocol, is_http2) = protocol_hints_from_flow_context(context);
    let (source_ip, source_port) = match &socket_family {
        SocketFamily::TcpV4 { local, .. } => (IpAddr::V4(*local.ip()), local.port()),
        SocketFamily::TcpV6 { local, .. } => (IpAddr::V6(*local.ip()), local.port()),
        SocketFamily::UnixDomain { .. } => (IpAddr::V4(Ipv4Addr::UNSPECIFIED), 0),
    };
    ConnectionInfo {
        connection_id: connection_id_for_flow_id(context.flow_id),
        source_ip,
        source_port,
        destination_host: context.server_host.clone(),
        destination_port: context.server_port,
        socket_family,
        tls_fingerprint: None,
        alpn_protocol,
        is_http2,
        process_info: None,
        connected_at: std::time::SystemTime::now(),
        request_count: 0,
    }
}

pub(crate) fn tls_info_from_flow_context(context: &FlowContext) -> Option<TlsInfo> {
    const COMMON_TLS_PORTS: [u16; 4] = [443, 8443, 4433, 9443];
    let likely_tls = context.protocol == ApplicationProtocol::Http2
        || COMMON_TLS_PORTS.contains(&context.server_port);
    if !likely_tls {
        return None;
    }

    tls_info_from_protocol_hints(context)
}

pub(crate) fn tls_info_from_intercept_decision(
    context: &FlowContext,
    intercepted_tls: bool,
) -> Option<TlsInfo> {
    if intercepted_tls {
        return tls_info_from_protocol_hints(context);
    }
    tls_info_from_flow_context(context)
}

fn tls_info_from_protocol_hints(context: &FlowContext) -> Option<TlsInfo> {
    let sni = normalize_sni(&context.server_host);
    let negotiated_proto = match context.protocol {
        ApplicationProtocol::Http2 => Some("h2".to_string()),
        ApplicationProtocol::Http1
        | ApplicationProtocol::WebSocket
        | ApplicationProtocol::Sse
        | ApplicationProtocol::StreamableHttp => Some("http/1.1".to_string()),
        ApplicationProtocol::Tunnel => None,
    };

    if sni.is_none() && negotiated_proto.is_none() {
        tracing::trace!(
            host = %context.server_host,
            protocol = ?context.protocol,
            "no TLS metadata available for flow"
        );
        None
    } else {
        Some(TlsInfo {
            sni,
            negotiated_proto,
            ja4_hash: None,
            tls_version: None,
        })
    }
}

fn protocol_hints_from_flow_context(context: &FlowContext) -> (Option<String>, bool) {
    match context.protocol {
        ApplicationProtocol::Http2 => (Some("h2".to_string()), true),
        ApplicationProtocol::Http1
        | ApplicationProtocol::WebSocket
        | ApplicationProtocol::Sse
        | ApplicationProtocol::StreamableHttp => (Some("http/1.1".to_string()), false),
        ApplicationProtocol::Tunnel => (None, false),
    }
}

fn normalize_sni(server_host: &str) -> Option<String> {
    let trimmed = server_host.trim();
    if trimmed.is_empty() || trimmed == "<unknown>" {
        None
    } else {
        Some(trimmed.to_string())
    }
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct UnixClientAddrMeta {
    pub(crate) pid: Option<u32>,
    pub(crate) path: Option<PathBuf>,
}

pub(crate) fn parse_unix_client_addr_meta(client_addr: &str) -> Option<UnixClientAddrMeta> {
    let raw = client_addr.strip_prefix("unix:")?;
    if raw.is_empty() {
        return Some(UnixClientAddrMeta {
            pid: None,
            path: None,
        });
    }
    let mut pid = None;
    let mut path = None;
    for part in raw
        .split(',')
        .map(str::trim)
        .filter(|part| !part.is_empty())
    {
        if let Some(raw_pid) = part.strip_prefix("pid=") {
            if let Ok(parsed) = raw_pid.trim().parse::<u32>() {
                pid = Some(parsed);
            }
            continue;
        }
        if let Some(raw_path) = part.strip_prefix("path=") {
            let value = raw_path.trim();
            if !value.is_empty() {
                path = Some(PathBuf::from(value));
            }
        }
    }
    Some(UnixClientAddrMeta { pid, path })
}

pub(crate) fn process_info_from_unix_client_addr(client_addr: &str) -> Option<ProcessInfo> {
    let meta = parse_unix_client_addr_meta(client_addr)?;
    let pid = meta.pid?;
    Some(ProcessInfo {
        pid,
        bundle_id: None,
        exe_name: None,
        exe_path: None,
        parent_pid: None,
        parent_process_name: None,
    })
}

#[cfg(test)]
mod tests {
    use super::{lookup_connection_info_from_flow_context, tls_info_from_flow_context};
    use crate::observe::FlowContext;
    use crate::protocol::ApplicationProtocol;
    use crate::types::FlowId;

    #[test]
    fn tls_info_is_populated_for_http2_context() {
        let context = FlowContext {
            flow_id: FlowId(7),
            client_addr: "127.0.0.1:5000".to_string(),
            server_host: "api.example.com".to_string(),
            server_port: 443,
            protocol: ApplicationProtocol::Http2,
        };
        let tls_info = tls_info_from_flow_context(&context).expect("tls info");
        assert_eq!(tls_info.sni.as_deref(), Some("api.example.com"));
        assert_eq!(tls_info.negotiated_proto.as_deref(), Some("h2"));
    }

    #[test]
    fn connection_info_protocol_hints_follow_flow_protocol() {
        let context = FlowContext {
            flow_id: FlowId(8),
            client_addr: "127.0.0.1:5001".to_string(),
            server_host: "api.example.com".to_string(),
            server_port: 443,
            protocol: ApplicationProtocol::Http2,
        };
        let info = lookup_connection_info_from_flow_context(&context);
        assert!(info.is_http2);
        assert_eq!(info.alpn_protocol.as_deref(), Some("h2"));
    }
}