lha 1.0.2

Long-Horizon Agent command-line package that installs the lha binary.
Documentation
use crate::product::agent::config_loader::ResidencyRequirement;
use crate::product::agent::spawn::LHA_SANDBOX_ENV_VAR;
use lha_llm::client::CodexHttpClient;
pub use lha_llm::client::CodexRequestBuilder;
use reqwest::header::HeaderMap;
use reqwest::header::HeaderValue;
use std::sync::LazyLock;
use std::sync::Mutex;
use std::sync::RwLock;

/// Set this to add a suffix to the User-Agent string.
///
/// It is not ideal that we're using a global singleton for this.
/// This is primarily designed to differentiate MCP clients from each other.
/// Because there can only be one MCP server per process, it should be safe for this to be a global static.
/// However, future users of this should use this with caution as a result.
/// In addition, we want to be confident that this value is used for ALL clients and doing that requires a
/// lot of wiring and it's easy to miss code paths by doing so.
/// See https://github.com/openai/codex/pull/3388/files for an example of what that would look like.
/// Finally, we want to make sure this is set for ALL mcp clients without needing to know a special env var
/// or having to set data that they already specified in the mcp initialize request somewhere else.
///
/// A space is automatically added between the suffix and the rest of the User-Agent string.
/// The full user agent string is returned from the mcp initialize response.
/// Parenthesis will be added by LHA. This should only specify what goes inside of the parenthesis.
pub static USER_AGENT_SUFFIX: LazyLock<Mutex<Option<String>>> = LazyLock::new(|| Mutex::new(None));
pub const DEFAULT_ORIGINATOR: &str = "lha_cli_rs";
pub const LHA_INTERNAL_ORIGINATOR_OVERRIDE_ENV_VAR: &str = "LHA_INTERNAL_ORIGINATOR_OVERRIDE";
pub const RESIDENCY_HEADER_NAME: &str = "x-openai-internal-lha-residency";

#[derive(Debug, Clone)]
pub struct Originator {
    pub value: String,
    pub header_value: HeaderValue,
}
static ORIGINATOR: LazyLock<RwLock<Option<Originator>>> = LazyLock::new(|| RwLock::new(None));
static REQUIREMENTS_RESIDENCY: LazyLock<RwLock<Option<ResidencyRequirement>>> =
    LazyLock::new(|| RwLock::new(None));

#[derive(Debug)]
pub enum SetOriginatorError {
    InvalidHeaderValue,
    AlreadyInitialized,
}

fn get_originator_value(provided: Option<String>) -> Originator {
    let value = std::env::var(LHA_INTERNAL_ORIGINATOR_OVERRIDE_ENV_VAR)
        .ok()
        .or(provided)
        .unwrap_or(DEFAULT_ORIGINATOR.to_string());

    match HeaderValue::from_str(&value) {
        Ok(header_value) => Originator {
            value,
            header_value,
        },
        Err(e) => {
            tracing::error!("Unable to turn originator override {value} into header value: {e}");
            Originator {
                value: DEFAULT_ORIGINATOR.to_string(),
                header_value: HeaderValue::from_static(DEFAULT_ORIGINATOR),
            }
        }
    }
}

pub fn set_default_originator(value: String) -> Result<(), SetOriginatorError> {
    if HeaderValue::from_str(&value).is_err() {
        return Err(SetOriginatorError::InvalidHeaderValue);
    }
    let originator = get_originator_value(Some(value));
    let Ok(mut guard) = ORIGINATOR.write() else {
        return Err(SetOriginatorError::AlreadyInitialized);
    };
    if guard.is_some() {
        return Err(SetOriginatorError::AlreadyInitialized);
    }
    *guard = Some(originator);
    Ok(())
}

pub fn set_default_client_residency_requirement(enforce_residency: Option<ResidencyRequirement>) {
    let Ok(mut guard) = REQUIREMENTS_RESIDENCY.write() else {
        tracing::warn!("Failed to acquire requirements residency lock");
        return;
    };
    *guard = enforce_residency;
}

pub fn originator() -> Originator {
    if let Ok(guard) = ORIGINATOR.read()
        && let Some(originator) = guard.as_ref()
    {
        return originator.clone();
    }

    if std::env::var(LHA_INTERNAL_ORIGINATOR_OVERRIDE_ENV_VAR).is_ok() {
        let originator = get_originator_value(None);
        if let Ok(mut guard) = ORIGINATOR.write() {
            match guard.as_ref() {
                Some(originator) => return originator.clone(),
                None => *guard = Some(originator.clone()),
            }
        }
        return originator;
    }

    get_originator_value(None)
}

pub fn is_first_party_originator(originator_value: &str) -> bool {
    originator_value == DEFAULT_ORIGINATOR
        || originator_value == "lha_vscode"
        || originator_value.starts_with("LHA ")
}

pub fn get_lha_user_agent() -> String {
    let build_version = env!("CARGO_PKG_VERSION");
    let os_info = os_info::get();
    let originator = originator();
    let prefix = format!(
        "{}/{build_version} ({} {}; {}) {}",
        originator.value.as_str(),
        os_info.os_type(),
        os_info.version(),
        os_info.architecture().unwrap_or("unknown"),
        crate::product::agent::terminal::user_agent()
    );
    let suffix = USER_AGENT_SUFFIX
        .lock()
        .ok()
        .and_then(|guard| guard.clone());
    let suffix = suffix
        .as_deref()
        .map(str::trim)
        .filter(|value| !value.is_empty())
        .map_or_else(String::new, |value| format!(" ({value})"));

    let candidate = format!("{prefix}{suffix}");
    sanitize_user_agent(candidate, &prefix)
}

/// Sanitize the user agent string.
///
/// Invalid characters are replaced with an underscore.
///
/// If the user agent fails to parse, it falls back to fallback and then to ORIGINATOR.
fn sanitize_user_agent(candidate: String, fallback: &str) -> String {
    if HeaderValue::from_str(candidate.as_str()).is_ok() {
        return candidate;
    }

    let sanitized: String = candidate
        .chars()
        .map(|ch| if matches!(ch, ' '..='~') { ch } else { '_' })
        .collect();
    if !sanitized.is_empty() && HeaderValue::from_str(sanitized.as_str()).is_ok() {
        tracing::warn!(
            "Sanitized LHA user agent because provided suffix contained invalid header characters"
        );
        sanitized
    } else if HeaderValue::from_str(fallback).is_ok() {
        tracing::warn!(
            "Falling back to base LHA user agent because provided suffix could not be sanitized"
        );
        fallback.to_string()
    } else {
        tracing::warn!(
            "Falling back to default LHA originator because base user agent string is invalid"
        );
        originator().value
    }
}

/// Create an HTTP client with default `originator` and `User-Agent` headers set.
pub fn create_client() -> CodexHttpClient {
    let inner = build_reqwest_client();
    CodexHttpClient::new(inner)
}

#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub struct ReqwestClientOptions {
    pub http1_only: bool,
}

pub fn build_reqwest_client() -> reqwest::Client {
    build_reqwest_client_with_options(ReqwestClientOptions::default())
}

pub fn build_reqwest_client_with_options(options: ReqwestClientOptions) -> reqwest::Client {
    let mut headers = HeaderMap::new();
    headers.insert("originator", originator().header_value);
    if let Ok(guard) = REQUIREMENTS_RESIDENCY.read()
        && let Some(requirement) = guard.as_ref()
        && !headers.contains_key(RESIDENCY_HEADER_NAME)
    {
        let value = match requirement {
            ResidencyRequirement::Us => HeaderValue::from_static("us"),
        };
        headers.insert(RESIDENCY_HEADER_NAME, value);
    }
    let ua = get_lha_user_agent();

    let mut builder = reqwest::Client::builder()
        // Set UA via dedicated helper to avoid header validation pitfalls
        .user_agent(ua)
        .default_headers(headers);
    if options.http1_only {
        builder = builder.http1_only();
    }
    if is_sandboxed() {
        builder = builder.no_proxy();
    }

    builder.build().unwrap_or_else(|_| reqwest::Client::new())
}

fn is_sandboxed() -> bool {
    std::env::var(LHA_SANDBOX_ENV_VAR).as_deref() == Ok("seatbelt")
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::test_support::core::skip_if_no_network;
    use pretty_assertions::assert_eq;

    #[test]
    fn test_get_lha_user_agent() {
        let user_agent = get_lha_user_agent();
        let originator = originator().value;
        let prefix = format!("{originator}/");
        assert!(user_agent.starts_with(&prefix));
    }

    #[test]
    fn is_first_party_originator_matches_known_values() {
        assert_eq!(is_first_party_originator(DEFAULT_ORIGINATOR), true);
        assert_eq!(is_first_party_originator("lha_vscode"), true);
        assert_eq!(is_first_party_originator("LHA Something Else"), true);
        assert_eq!(is_first_party_originator("lha_cli"), false);
        assert_eq!(is_first_party_originator("Other"), false);
    }

    #[tokio::test]
    async fn test_create_client_sets_default_headers() {
        skip_if_no_network!();

        set_default_client_residency_requirement(Some(ResidencyRequirement::Us));

        use wiremock::Mock;
        use wiremock::MockServer;
        use wiremock::ResponseTemplate;
        use wiremock::matchers::method;
        use wiremock::matchers::path;

        let client = create_client();

        // Spin up a local mock server and capture a request.
        let server = MockServer::start().await;
        Mock::given(method("GET"))
            .and(path("/"))
            .respond_with(ResponseTemplate::new(200))
            .mount(&server)
            .await;

        let resp = client
            .get(server.uri())
            .send()
            .await
            .expect("failed to send request");
        assert!(resp.status().is_success());

        let requests = server
            .received_requests()
            .await
            .expect("failed to fetch received requests");
        assert!(!requests.is_empty());
        let headers = &requests[0].headers;

        // originator header is set to the provided value
        let originator_header = headers
            .get("originator")
            .expect("originator header missing");
        assert_eq!(originator_header.to_str().unwrap(), originator().value);

        // User-Agent matches the computed LHA UA for that originator
        let expected_ua = get_lha_user_agent();
        let ua_header = headers
            .get("user-agent")
            .expect("user-agent header missing");
        assert_eq!(ua_header.to_str().unwrap(), expected_ua);

        let residency_header = headers
            .get(RESIDENCY_HEADER_NAME)
            .expect("residency header missing");
        assert_eq!(residency_header.to_str().unwrap(), "us");

        set_default_client_residency_requirement(None);
    }

    #[test]
    fn reqwest_client_options_default_does_not_force_http1() {
        assert_eq!(
            ReqwestClientOptions::default(),
            ReqwestClientOptions { http1_only: false }
        );
    }

    #[test]
    fn build_reqwest_client_accepts_http1_only_option() {
        let _client = build_reqwest_client_with_options(ReqwestClientOptions { http1_only: true });
    }

    #[test]
    fn test_invalid_suffix_is_sanitized() {
        let prefix = "lha_cli_rs/0.0.0";
        let suffix = "bad\rsuffix";

        assert_eq!(
            sanitize_user_agent(format!("{prefix} ({suffix})"), prefix),
            "lha_cli_rs/0.0.0 (bad_suffix)"
        );
    }

    #[test]
    fn test_invalid_suffix_is_sanitized2() {
        let prefix = "lha_cli_rs/0.0.0";
        let suffix = "bad\0suffix";

        assert_eq!(
            sanitize_user_agent(format!("{prefix} ({suffix})"), prefix),
            "lha_cli_rs/0.0.0 (bad_suffix)"
        );
    }

    #[test]
    #[cfg(target_os = "macos")]
    fn test_macos() {
        let user_agent = get_lha_user_agent();
        let originator = originator().value;
        let prefix = format!("{originator}/");
        assert!(user_agent.starts_with(&prefix));

        let platform_start = user_agent
            .find(" (")
            .expect("user agent has platform start");
        let platform_content_start = platform_start + " (".len();
        let platform_end = user_agent[platform_content_start..]
            .find(") ")
            .map(|offset| platform_content_start + offset)
            .expect("user agent has platform end");
        let platform = &user_agent[platform_content_start..platform_end];

        let platform = platform
            .strip_prefix("Mac OS ")
            .expect("macOS platform starts with OS name");
        let (os_version, architecture) = platform
            .split_once("; ")
            .expect("macOS platform includes architecture");
        assert!(!os_version.is_empty());
        assert!(
            os_version
                .split('.')
                .all(|part| { !part.is_empty() && part.chars().all(|ch| ch.is_ascii_digit()) })
        );
        assert!(matches!(
            architecture,
            "arm64" | "x86_64" | "aarch64" | "x86" | "i386" | "unknown"
        ));

        let terminal_suffix = &user_agent[platform_end + ") ".len()..];
        assert!(!terminal_suffix.trim().is_empty());
    }
}