gephyr 1.16.15

Gephyr is a headless local AI relay/proxy API handling OpenAI, Claude, and Gemini-compatible APIs
Documentation
param(
    [string]$GephyrPath = "output/gephyr_google_outbound_headers.jsonl",
    [Parameter(Mandatory = $true)]
    [string]$KnownGoodPath,
    [string]$OutJson = "output/google_trace_diff_report.json",
    [string]$OutText = "output/google_trace_diff_report.txt",
    [string[]]$IgnoreHeaders = @("content-length"),
    [switch]$IgnoreConnectionHeader
    ,
    [switch]$IgnoreDeviceHeaders
)

$ErrorActionPreference = "Stop"

function Get-JsonlRecords {
    param([string]$Path)
    if (-not (Test-Path $Path)) {
        throw "File not found: $Path"
    }

    $records = @()
    foreach ($line in Get-Content -Path $Path) {
        $trimmed = $line.Trim()
        if (-not $trimmed) { continue }
        $records += ($trimmed | ConvertFrom-Json)
    }
    return $records
}

function Is-GoogleEndpoint {
    param([string]$Endpoint)
    if (-not $Endpoint) { return $false }
    return $Endpoint -match '(?i)^https?://[^/]*(googleapis\.com|google\.com)(?::\d+)?/'
}

function Normalize-Endpoint {
    param([string]$Endpoint)
    if (-not $Endpoint) { return $Endpoint }
    try {
        $uri = [System.Uri]$Endpoint
    } catch {
        return $Endpoint
    }

    $normalizedHost = $uri.Host.ToLowerInvariant()
    if ($normalizedHost -eq "daily-cloudcode-pa.googleapis.com") {
        $normalizedHost = "cloudcode-pa.googleapis.com"
    }

    $portPart = ""
    if (-not $uri.IsDefaultPort) {
        $portPart = ":$($uri.Port)"
    }
    return "{0}://{1}{2}{3}" -f $uri.Scheme.ToLowerInvariant(), $normalizedHost, $portPart, $uri.PathAndQuery
}

function Is-NoiseEndpoint {
    param([string]$Endpoint)
    if (-not $Endpoint) { return $false }
    return $Endpoint -match '(?i)^https?://oauth2\.googleapis\.com/tokeninfo(?:\?|$)'
}

function Get-HarRecords {
    param([string]$Path)
    if (-not (Test-Path $Path)) {
        throw "File not found: $Path"
    }

    $har = Get-Content -Path $Path -Raw | ConvertFrom-Json
    $records = @()
    foreach ($entry in $har.log.entries) {
        if (-not $entry.request.url) { continue }
        $headers = @{}
        foreach ($h in $entry.request.headers) {
            if (-not $h.name) { continue }
            $name = [string]$h.name
            $value = [string]$h.value
            $headers[$name.ToLowerInvariant()] = $value
        }

        $record = [pscustomobject]@{
            timestamp = $entry.startedDateTime
            endpoint  = [string]$entry.request.url
            mode      = "known_good"
            headers   = $headers
        }
        if (Is-GoogleEndpoint -Endpoint $record.endpoint) {
            $records += $record
        }
    }
    return $records
}

function Get-SazRecords {
    param([string]$Path)
    if (-not (Test-Path $Path)) {
        throw "File not found: $Path"
    }

    Add-Type -AssemblyName System.IO.Compression.FileSystem
    $zip = [System.IO.Compression.ZipFile]::OpenRead((Resolve-Path $Path))
    try {
        $records = @()
        $requestEntries = $zip.Entries | Where-Object { $_.FullName -like "raw/*_c.txt" }
        foreach ($entry in $requestEntries) {
            $stream = $entry.Open()
            try {
                $reader = [System.IO.StreamReader]::new($stream)
                $text = $reader.ReadToEnd()
                $reader.Dispose()
            } finally {
                $stream.Dispose()
            }

            $lines = $text -split "`r?`n"
            if ($lines.Count -eq 0) { continue }
            $first = $lines[0].Trim()
            if (-not $first) { continue }

            $m = [regex]::Match($first, '^(?<method>[A-Z]+)\s+(?<target>\S+)\s+HTTP/\d+\.\d+$')
            if (-not $m.Success) { continue }
            $method = $m.Groups["method"].Value
            $target = $m.Groups["target"].Value

            if ($method -eq "CONNECT") {
                continue
            }

            $headers = @{}
            for ($i = 1; $i -lt $lines.Count; $i++) {
                $line = $lines[$i]
                if ([string]::IsNullOrWhiteSpace($line)) {
                    break
                }
                $hm = [regex]::Match($line, '^(?<name>[^:]+):\s*(?<value>.*)$')
                if ($hm.Success) {
                    $name = $hm.Groups["name"].Value.Trim().ToLowerInvariant()
                    $value = $hm.Groups["value"].Value.Trim()
                    $headers[$name] = $value
                }
            }

            $endpoint = $target
            if (-not ($endpoint -match '^https?://')) {
                $host = $headers["host"]
                if ($host -and $endpoint.StartsWith("/")) {
                    $scheme = if ($host -match ':\s*443$') { "https" } else { "http" }
                    $endpoint = "${scheme}://$host$endpoint"
                }
            }

            if (-not (Is-GoogleEndpoint -Endpoint $endpoint)) {
                continue
            }

            $records += [pscustomobject]@{
                timestamp = $null
                endpoint  = $endpoint
                mode      = "known_good"
                headers   = $headers
            }
        }

        return $records
    } finally {
        $zip.Dispose()
    }
}

function Load-TraceRecords {
    param([string]$Path)
    $ext = [IO.Path]::GetExtension($Path).ToLowerInvariant()
    if ($ext -eq ".har") {
        return Get-HarRecords -Path $Path
    }
    if ($ext -eq ".saz") {
        return Get-SazRecords -Path $Path
    }
    return Get-JsonlRecords -Path $Path
}

function New-NormalizedHeaderIgnoreSet {
    param(
        [string[]]$IgnoreHeaders,
        [bool]$IncludeConnection,
        [bool]$IncludeDevice
    )
    $set = New-Object System.Collections.Generic.HashSet[string]
    foreach ($h in $IgnoreHeaders) {
        if (-not $h) { continue }
        [void]$set.Add($h.Trim().ToLowerInvariant())
    }
    if ($IncludeConnection) {
        [void]$set.Add("connection")
    }
    if ($IncludeDevice) {
        foreach ($h in @("x-machine-id","x-mac-machine-id","x-dev-device-id","x-sqm-id")) {
            [void]$set.Add($h)
        }
    }
    return $set
}

function Get-EndpointStats {
    param([object[]]$Records)
    $byEndpoint = @{}
    foreach ($r in $Records) {
        $endpoint = Normalize-Endpoint -Endpoint ([string]$r.endpoint)
        if (-not $endpoint) { continue }
        if (Is-NoiseEndpoint -Endpoint $endpoint) { continue }
        if (-not $byEndpoint.ContainsKey($endpoint)) {
            $byEndpoint[$endpoint] = [pscustomobject]@{
                count = 0
                headers = (New-Object System.Collections.Generic.HashSet[string])
            }
        }
        $byEndpoint[$endpoint].count += 1

        $headers = $r.headers
        if ($headers -is [System.Collections.IDictionary]) {
            foreach ($k in $headers.Keys) {
                [void]$byEndpoint[$endpoint].headers.Add(([string]$k).ToLowerInvariant())
            }
        } else {
            foreach ($prop in $headers.PSObject.Properties.Name) {
                [void]$byEndpoint[$endpoint].headers.Add(([string]$prop).ToLowerInvariant())
            }
        }
    }
    return $byEndpoint
}

function Get-BlockedHeaders {
    param([string[]]$HeaderNames)
    $blocked = @()
    foreach ($h in $HeaderNames) {
        if (
            $h -like "sec-*" -or
            $h -eq "origin" -or
            $h -eq "referer" -or
            $h -eq "cookie" -or
            $h -like "x-forwarded-*" -or
            $h -eq "x-real-ip" -or
            $h -eq "connection" -or
            $h -eq "transfer-encoding" -or
            $h -eq "upgrade" -or
            $h -eq "keep-alive" -or
            $h -eq "proxy-authenticate" -or
            $h -eq "proxy-authorization" -or
            $h -eq "te" -or
            $h -eq "trailers"
        ) {
            $blocked += $h
        }
    }
    return ($blocked | Sort-Object -Unique)
}

$gephyr = Load-TraceRecords -Path $GephyrPath
$known = Load-TraceRecords -Path $KnownGoodPath
$knownIsEmpty = ($known.Count -eq 0)
if ($knownIsEmpty) {
    Write-Warning "No Google HTTP requests were parsed from '$KnownGoodPath'. If this is a Fiddler SAZ, enable HTTPS decryption so requests are captured beyond CONNECT tunnels."
}

$ignoreSet = New-NormalizedHeaderIgnoreSet -IgnoreHeaders $IgnoreHeaders -IncludeConnection:$IgnoreConnectionHeader -IncludeDevice:$IgnoreDeviceHeaders

$gephyrByEndpoint = Get-EndpointStats -Records $gephyr
$knownByEndpoint = Get-EndpointStats -Records $known

$allEndpoints = @($gephyrByEndpoint.Keys + $knownByEndpoint.Keys | Sort-Object -Unique)
$endpointComparisons = @()

foreach ($endpoint in $allEndpoints) {
    $gephyrHeaders = @()
    $knownHeaders = @()
    $gephyrCount = 0
    $knownCount = 0
    if ($gephyrByEndpoint.ContainsKey($endpoint)) {
        $gephyrCount = [int]$gephyrByEndpoint[$endpoint].count
        $gephyrHeaders = @($gephyrByEndpoint[$endpoint].headers | Sort-Object)
    }
    if ($knownByEndpoint.ContainsKey($endpoint)) {
        $knownCount = [int]$knownByEndpoint[$endpoint].count
        $knownHeaders = @($knownByEndpoint[$endpoint].headers | Sort-Object)
    }

    $knownHeadersCompared = @($knownHeaders | Where-Object { -not $ignoreSet.Contains($_) } | Sort-Object)
    $gephyrHeadersCompared = @($gephyrHeaders | Where-Object { -not $ignoreSet.Contains($_) } | Sort-Object)

    $missingInGephyr = @($knownHeadersCompared | Where-Object { $_ -notin $gephyrHeadersCompared } | Sort-Object)
    $extraInGephyr = @($gephyrHeadersCompared | Where-Object { $_ -notin $knownHeadersCompared } | Sort-Object)
    $blockedInGephyr = Get-BlockedHeaders -HeaderNames $gephyrHeaders

    $classification = "matched_or_extra_only"
    if ($knownCount -gt 0 -and $gephyrCount -eq 0) {
        $classification = "missing_endpoint_not_exercised"
    } elseif ($knownCount -gt 0 -and $gephyrCount -gt 0 -and $missingInGephyr.Count -gt 0) {
        $classification = "missing_headers_on_exercised_endpoint"
    } elseif ($knownCount -eq 0 -and $gephyrCount -gt 0) {
        $classification = "extra_endpoint_in_gephyr"
    }

    # If the endpoint was not present in known-good, listing "extra" headers is misleading noise
    # (it just reflects that there was no baseline). Keep header inventories in JSON, but
    # suppress the derived extra/missing lists for clarity.
    if ($classification -eq "extra_endpoint_in_gephyr") {
        $missingInGephyr = @()
        $extraInGephyr = @()
    }

    $endpointComparisons += [pscustomobject]@{
        endpoint           = $endpoint
        classification     = $classification
        known_request_count = $knownCount
        gephyr_request_count = $gephyrCount
        known_header_names = $knownHeaders
        gephyr_header_names = $gephyrHeaders
        missing_in_gephyr  = $missingInGephyr
        extra_in_gephyr    = $extraInGephyr
        blocked_in_gephyr  = $blockedInGephyr
    }
}

$classificationSummary = $endpointComparisons |
    Group-Object classification |
    Sort-Object Name |
    ForEach-Object {
        [pscustomobject]@{
            classification = $_.Name
            count = $_.Count
        }
    }

$report = [pscustomobject]@{
    generated_at = (Get-Date).ToString("o")
    gephyr_path = $GephyrPath
    known_good_path = $KnownGoodPath
    gephyr_records = $gephyr.Count
    known_good_records = $known.Count
    endpoint_count = $allEndpoints.Count
    ignored_headers = @($ignoreSet | Sort-Object)
    classification_summary = $classificationSummary
    endpoints = $endpointComparisons
}

$report | ConvertTo-Json -Depth 8 | Set-Content -Path $OutJson

$lines = @()
$lines += "Google Trace Diff Report"
$lines += "Generated: $($report.generated_at)"
$lines += "Gephyr records: $($report.gephyr_records)"
$lines += "Known-good records: $($report.known_good_records)"
$lines += "Endpoints compared: $($report.endpoint_count)"
$lines += "Ignored headers in diff: $((@($report.ignored_headers) -join ', '))"
$lines += "Classification summary:"
foreach ($c in $classificationSummary) {
    $lines += "  $($c.classification): $($c.count)"
}
$lines += ""
if ($knownIsEmpty) {
    $lines += "WARNING: No known-good Google HTTP requests were parsed."
    $lines += "For Fiddler SAZ captures, enable HTTPS decryption and re-capture."
    $lines += ""
}
$lines += ""
foreach ($e in $endpointComparisons) {
    $lines += "Endpoint: $($e.endpoint)"
    $lines += "  classification: $($e.classification)"
    $lines += "  exercised_known_good: $($e.known_request_count)"
    $lines += "  exercised_gephyr: $($e.gephyr_request_count)"
    if ($e.classification -eq "extra_endpoint_in_gephyr") {
        $lines += "  missing_in_gephyr: "
        $lines += "  extra_in_gephyr: "
        $lines += "  note: endpoint not present in known-good capture; recapture known-good to diff headers."
    } else {
        $lines += "  missing_in_gephyr: $((@($e.missing_in_gephyr) -join ', '))"
        $lines += "  extra_in_gephyr: $((@($e.extra_in_gephyr) -join ', '))"
    }
    $lines += "  blocked_in_gephyr: $((@($e.blocked_in_gephyr) -join ', '))"
    $lines += ""
}
$lines | Set-Content -Path $OutText

Write-Output "Saved JSON report: $OutJson"
Write-Output "Saved text report: $OutText"